diff --git a/.github/workflows/mac.yml b/.github/workflows/mac.yml index 5cc99e3e42eeb..e99f16afe700c 100644 --- a/.github/workflows/mac.yml +++ b/.github/workflows/mac.yml @@ -105,7 +105,7 @@ jobs: --use_xnnpack \ --use_binskim_compliant_compile_flags \ --ios \ - --apple_deploy_target=13.0 \ + --apple_deploy_target=15.1 \ --apple_sysroot=iphonesimulator \ --osx_arch=${{ matrix.target_arch }} diff --git a/.github/workflows/sca.yml b/.github/workflows/sca.yml deleted file mode 100644 index 51166293f06ac..0000000000000 --- a/.github/workflows/sca.yml +++ /dev/null @@ -1,177 +0,0 @@ -name: Windows_SCA -on: - push: - branches: - - main - - rel-* - pull_request: - -concurrency: - group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }} - cancel-in-progress: true - -env: - AZCOPY_AUTO_LOGIN_TYPE: MSI - AZCOPY_MSI_CLIENT_ID: 63b63039-6328-442f-954b-5a64d124e5b4 - -jobs: - Onnxruntime-SCA-training-CUDA: - permissions: - security-events: write - runs-on: ["self-hosted", "1ES.Pool=onnxruntime-github-vs2022-mms"] - steps: - - uses: actions/checkout@v4 - with: - submodules: false - - uses: actions/setup-python@v5 - with: - python-version: '3.11.x' - architecture: 'x64' - - - uses: actions/setup-node@v4 - with: - node-version: 20 - - - name: Download cuda - run: azcopy.exe cp --recursive "https://lotusscus.blob.core.windows.net/models/cuda_sdk/v11.8" cuda_sdk - - - - name: Install ONNX - run: | - &tools\ci_build\github\windows\install_third_party_deps.ps1 -cpu_arch x64 -install_prefix D:\b\Debug\installed -build_config Debug - - # The build machine doesn't have a GPU. So the value of CMAKE_CUDA_ARCHITECTURES doesn't matter. - - name: Build code - env: - CAExcludePath: 'C:\Program Files;D:\b;${{ github.workspace }}\cmake' - run: python tools\ci_build\build.py --windows_sdk_version 10.0.22621.0 --enable_training --build_java --compile_no_warning_as_error --config Debug --build_dir D:\b --skip_submodule_sync --build_csharp --update --build --parallel --cmake_generator "Visual Studio 17 2022" --build_shared_lib --enable_pybind --cmake_extra_defines onnxruntime_USE_CUSTOM_STATIC_ANALYSIS_RULES=ON --cmake_extra_defines onnxruntime_ENABLE_STATIC_ANALYSIS=ON --cmake_extra_defines onnxruntime_REDIRECT_STATIC_ANALYSIS_OUTPUTS_TO_FILE=ON --use_cuda --cuda_home=${{ github.workspace }}\cuda_sdk\v11.8 --use_binskim_compliant_compile_flags --enable_cuda_profiling --cmake_extra_defines CMAKE_CUDA_ARCHITECTURES=75 - - - name: Generate sarif - working-directory: D:\b - run: npx @microsoft/sarif-multitool merge *.sarif --recurse --output-directory=${{ github.workspace }}\output --output-file=MergeResult.sarif --merge-runs && dir ${{ github.workspace }}\output - - - name: Upload SARIF to GitHub - uses: github/codeql-action/upload-sarif@v3 - continue-on-error: true - with: - sarif_file: ${{ github.workspace }}\output\MergeResult.sarif - category: VS_SCA - - # With WebGPU, Without python - Onnxruntime-SCA-win32-WebGPU-x64: - permissions: - security-events: write - runs-on: ["self-hosted", "1ES.Pool=onnxruntime-github-vs2022-mms"] - steps: - - uses: actions/checkout@v4 - with: - submodules: false - - uses: actions/setup-python@v5 - with: - python-version: '3.11.x' - architecture: 'x64' - - - uses: actions/setup-node@v4 - with: - node-version: 20 - - - name: Delete build folder - run: | - if (Test-Path D:\b) { Remove-Item -Recurse -Force D:\b } - - - - name: Build code - env: - CAExcludePath: 'C:\Program Files;D:\b;${{ github.workspace }}\cmake' - run: python tools\ci_build\build.py --compile_no_warning_as_error --config Debug --build_dir D:\b --skip_submodule_sync --update --build --parallel --cmake_generator "Visual Studio 17 2022" --build_shared_lib --cmake_extra_defines onnxruntime_USE_CUSTOM_STATIC_ANALYSIS_RULES=ON --cmake_extra_defines onnxruntime_ENABLE_STATIC_ANALYSIS=ON --cmake_extra_defines onnxruntime_REDIRECT_STATIC_ANALYSIS_OUTPUTS_TO_FILE=ON --use_webgpu - - - name: Generate sarif - working-directory: D:\b - run: npx @microsoft/sarif-multitool merge *.sarif --recurse --output-directory=${{ github.workspace }}\output --output-file=MergeResult.sarif --merge-runs && dir ${{ github.workspace }}\output - - - name: Upload SARIF to GitHub - uses: github/codeql-action/upload-sarif@v3 - continue-on-error: true - with: - sarif_file: ${{ github.workspace }}\output\MergeResult.sarif - category: VS_SCA_WIN32_WEBGPU_X64 - - # No python - Onnxruntime-SCA-win32-WINML-x64: - permissions: - security-events: write - runs-on: ["self-hosted", "1ES.Pool=onnxruntime-github-vs2022-mms"] - steps: - - uses: actions/checkout@v4 - with: - submodules: false - - uses: actions/setup-python@v5 - with: - python-version: '3.11.x' - architecture: 'x64' - - - uses: actions/setup-node@v4 - with: - node-version: 20 - - - name: Delete build folder - run: | - if (Test-Path D:\b) { Remove-Item -Recurse -Force D:\b } - &tools\ci_build\github\windows\install_third_party_deps.ps1 -cpu_arch x64 -install_prefix D:\b\Debug\installed -build_config Debug - - # The build machine doesn't have a GPU. So the value of CMAKE_CUDA_ARCHITECTURES doesn't matter. - - name: Build code - env: - CAExcludePath: 'C:\Program Files;D:\b;${{ github.workspace }}\cmake' - run: python tools\ci_build\build.py --build_java --compile_no_warning_as_error --config Debug --build_dir D:\b --skip_submodule_sync --build_csharp --update --build --parallel --cmake_generator "Visual Studio 17 2022" --build_shared_lib --cmake_extra_defines onnxruntime_USE_CUSTOM_STATIC_ANALYSIS_RULES=ON --cmake_extra_defines onnxruntime_ENABLE_STATIC_ANALYSIS=ON --cmake_extra_defines onnxruntime_REDIRECT_STATIC_ANALYSIS_OUTPUTS_TO_FILE=ON --ms_experimental --use_dml --use_winml --disable_rtti --enable_wcos --build_shared_lib - - - name: Generate sarif - working-directory: D:\b - run: npx @microsoft/sarif-multitool merge *.sarif --recurse --output-directory=${{ github.workspace }}\output --output-file=MergeResult.sarif --merge-runs && dir ${{ github.workspace }}\output - - - name: Upload SARIF to GitHub - uses: github/codeql-action/upload-sarif@v3 - continue-on-error: true - with: - sarif_file: ${{ github.workspace }}\output\MergeResult.sarif - category: VS_SCA_WIN32_WINML_X64 - - # No java, No python - Onnxruntime-SCA-win32-WINML-x86: - permissions: - security-events: write - runs-on: ["self-hosted", "1ES.Pool=onnxruntime-github-vs2022-mms"] - steps: - - uses: actions/checkout@v4 - with: - submodules: false - - uses: actions/setup-python@v5 - with: - python-version: '3.11.x' - architecture: 'x86' - - - uses: actions/setup-node@v4 - with: - node-version: 20 - - - name: Delete build folder - run: | - if (Test-Path D:\b) { Remove-Item -Recurse -Force D:\b } - &tools\ci_build\github\windows\install_third_party_deps.ps1 -cpu_arch x86 -install_prefix D:\b\Debug\installed -build_config Debug - - # The build machine doesn't have a GPU. So the value of CMAKE_CUDA_ARCHITECTURES doesn't matter. - - name: Build code - env: - CAExcludePath: 'C:\Program Files;D:\b;${{ github.workspace }}\cmake' - run: python tools\ci_build\build.py --compile_no_warning_as_error --config Debug --build_dir D:\b --skip_submodule_sync --build_csharp --update --build --parallel --cmake_generator "Visual Studio 17 2022" --build_shared_lib --cmake_extra_defines onnxruntime_USE_CUSTOM_STATIC_ANALYSIS_RULES=ON --cmake_extra_defines onnxruntime_ENABLE_STATIC_ANALYSIS=ON --cmake_extra_defines onnxruntime_REDIRECT_STATIC_ANALYSIS_OUTPUTS_TO_FILE=ON --ms_experimental --use_dml --use_winml --disable_rtti --enable_wcos --build_shared_lib - - - name: Generate sarif - working-directory: D:\b - run: npx @microsoft/sarif-multitool merge *.sarif --recurse --output-directory=${{ github.workspace }}\output --output-file=MergeResult.sarif --merge-runs && dir ${{ github.workspace }}\output - - - name: Upload SARIF to GitHub - uses: github/codeql-action/upload-sarif@v3 - continue-on-error: true - with: - sarif_file: ${{ github.workspace }}\output\MergeResult.sarif - category: VS_SCA_WIN32_WINML_X86 diff --git a/.github/workflows/windows.yml b/.github/workflows/windows.yml index 35bacb2b6bac6..5adfad63093bc 100644 --- a/.github/workflows/windows.yml +++ b/.github/workflows/windows.yml @@ -37,7 +37,7 @@ jobs: - name: Delete build folder run: | if (Test-Path D:\b) { Remove-Item -Recurse -Force D:\b } - &tools\ci_build\github\windows\install_third_party_deps.ps1 -cpu_arch x64 -install_prefix D:\b\Debug\installed -build_config Debug + # The build machine doesn't have a GPU. So the value of CMAKE_CUDA_ARCHITECTURES doesn't matter. - name: Build code diff --git a/.lintrunner.toml b/.lintrunner.toml index 5ef9ad9337f57..74744277fa1e3 100644 --- a/.lintrunner.toml +++ b/.lintrunner.toml @@ -61,7 +61,7 @@ is_formatter = true [[linter]] -code = 'BLACK-ISORT' +code = 'RUFF-FORMAT' include_patterns = [ '**/*.py', ] @@ -76,7 +76,7 @@ command = [ '-m', 'lintrunner_adapters', 'run', - 'black_isort_linter', + 'ruff_format_linter', '--', '@{{PATHSFILE}}' ] diff --git a/cgmanifests/generated/cgmanifest.json b/cgmanifests/generated/cgmanifest.json index ced418e0f4cc9..46f8c8891dda5 100644 --- a/cgmanifests/generated/cgmanifest.json +++ b/cgmanifests/generated/cgmanifest.json @@ -136,7 +136,7 @@ "component": { "type": "git", "git": { - "commitHash": "309b75c9e56e0a674bf78d59872ce131f814dfb6", + "commitHash": "fe98e0b93565382648129271381c14d6205255e3", "repositoryUrl": "https://github.com/google/XNNPACK.git" }, "comments": "googlexnnpack" @@ -226,8 +226,8 @@ "component": { "type": "git", "git": { - "commitHash": "4fe0e1e183925bf8cfa6aae24237e724a96479b8", - "repositoryUrl": "https://github.com/Maratyszcza/pthreadpool.git" + "commitHash": "4e80ca24521aa0fb3a746f9ea9c3eaa20e9afbb0", + "repositoryUrl": "https://github.com/google/pthreadpool.git" }, "comments": "pthreadpool" } @@ -246,7 +246,7 @@ "component": { "type": "git", "git": { - "commitHash": "ca678952a9a8eaa6de112d154e8e104b22f9ab3f", + "commitHash": "8a1772a0c5c447df2d18edf33ec4603a8c9c04a6", "repositoryUrl": "https://github.com/pytorch/cpuinfo.git" }, "comments": "pytorch_cpuinfo" @@ -306,7 +306,7 @@ "component": { "type": "git", "git": { - "commitHash": "94142d8391c9791ec71c38336436319a2d4ac7a0", + "commitHash": "f3f6caa6e8adb420e005ec41c6fefc8d75affb6e", "repositoryUrl": "https://github.com/microsoft/onnxruntime-extensions.git" }, "comments": "extensions" @@ -346,7 +346,7 @@ "component": { "type": "git", "git": { - "commitHash": "12a3b24c456cebd9fd11f23ac0164f78129b00c6", + "commitHash": "b9b4a37041dec3dd62ac92014a6cc1aece48d9f3", "repositoryUrl": "https://github.com/google/dawn.git" }, "comments": "dawn" diff --git a/cmake/CMakeLists.txt b/cmake/CMakeLists.txt index febefff6756e7..8650cc53d93ef 100644 --- a/cmake/CMakeLists.txt +++ b/cmake/CMakeLists.txt @@ -93,6 +93,7 @@ option(onnxruntime_USE_OPENVINO "Build with OpenVINO support" OFF) option(onnxruntime_USE_COREML "Build with CoreML support" OFF) option(onnxruntime_USE_NNAPI_BUILTIN "Build with builtin NNAPI lib for Android NNAPI support" OFF) option(onnxruntime_USE_QNN "Build with QNN support" OFF) +option(onnxruntime_BUILD_QNN_EP_STATIC_LIB "Build with QNN EP as a static library" OFF) option(onnxruntime_USE_SNPE "Build with SNPE support" OFF) option(onnxruntime_USE_RKNPU "Build with RKNPU support" OFF) option(onnxruntime_USE_DNNL "Build with DNNL support" OFF) @@ -258,6 +259,12 @@ option(onnxruntime_USE_AZURE "Build with azure inferencing support" OFF) option(onnxruntime_USE_LOCK_FREE_QUEUE "Build with lock-free task queue for threadpool." OFF) option(onnxruntime_FORCE_GENERIC_ALGORITHMS "Disable optimized arch-specific algorithms. Use only for testing and debugging generic algorithms." OFF) +option(onnxruntime_USE_TENSORRT_INTERFACE "Build ONNXRuntime shared lib which is compatible with TensorRT EP interface" OFF) +option(onnxruntime_USE_CUDA_INTERFACE "Build ONNXRuntime shared lib which is compatible with Cuda EP interface" OFF) +option(onnxruntime_USE_OPENVINO_INTERFACE "Build ONNXRuntime shared lib which is compatible with OpenVINO EP interface" OFF) +option(onnxruntime_USE_VITISAI_INTERFACE "Build ONNXRuntime shared lib which is compatible with Vitis-AI EP interface" OFF) +option(onnxruntime_USE_QNN_INTERFACE "Build ONNXRuntime shared lib which is compatible with QNN EP interface" OFF) + # ENABLE_TRAINING includes all training functionality # The following 2 entry points # 1. ORTModule @@ -371,6 +378,70 @@ if (onnxruntime_USE_ROCM) set(onnxruntime_HIPIFY_PERL ${HIPIFY_PERL_PATH}/hipify-perl) endif() + # replicate strategy used by pytorch to get ROCM_VERSION + # https://github.com/pytorch/pytorch/blob/1a10751731784942dcbb9c0524c1369a29d45244/cmake/public/LoadHIP.cmake#L45-L109 + # with modification + set(ROCM_INCLUDE_DIRS "${onnxruntime_ROCM_HOME}/include") + set(PROJECT_RANDOM_BINARY_DIR "${CMAKE_BINARY_DIR}") + set(file "${CMAKE_BINARY_DIR}/detect_rocm_version.cc") + + # Find ROCM version for checks + # ROCM 5.0 and later will have header api for version management + if(EXISTS ${ROCM_INCLUDE_DIRS}/rocm_version.h) + file(WRITE ${file} "" + "#include \n" + ) + elseif(EXISTS ${ROCM_INCLUDE_DIRS}/rocm-core/rocm_version.h) + file(WRITE ${file} "" + "#include \n" + ) + else() + message(FATAL_ERROR "********************* rocm_version.h couldnt be found ******************\n") + endif() + + file(APPEND ${file} "" + "#include \n" + + "#ifndef ROCM_VERSION_PATCH\n" + "#define ROCM_VERSION_PATCH 0\n" + "#endif\n" + "#define STRINGIFYHELPER(x) #x\n" + "#define STRINGIFY(x) STRINGIFYHELPER(x)\n" + "int main() {\n" + " printf(\"%d.%d.%s\", ROCM_VERSION_MAJOR, ROCM_VERSION_MINOR, STRINGIFY(ROCM_VERSION_PATCH));\n" + " return 0;\n" + "}\n" + ) + + try_run(run_result compile_result ${PROJECT_RANDOM_BINARY_DIR} ${file} + CMAKE_FLAGS "-DINCLUDE_DIRECTORIES=${ROCM_INCLUDE_DIRS}" + RUN_OUTPUT_VARIABLE rocm_version_from_header + COMPILE_OUTPUT_VARIABLE output_var + ) + # We expect the compile to be successful if the include directory exists. + if(NOT compile_result) + message(FATAL_ERROR "ROCM: Couldn't determine version from header: " ${output_var}) + endif() + message(STATUS "ROCM: Header version is: " ${rocm_version_from_header}) + set(ROCM_VERSION_DEV_RAW ${rocm_version_from_header}) + + string(REGEX MATCH "^([0-9]+)\.([0-9]+)\.([0-9]+).*$" ROCM_VERSION_DEV_MATCH ${ROCM_VERSION_DEV_RAW}) + + if (ROCM_VERSION_DEV_MATCH) + set(ROCM_VERSION_DEV_MAJOR ${CMAKE_MATCH_1}) + set(ROCM_VERSION_DEV_MINOR ${CMAKE_MATCH_2}) + set(ROCM_VERSION_DEV_PATCH ${CMAKE_MATCH_3}) + set(ROCM_VERSION_DEV "${ROCM_VERSION_DEV_MAJOR}.${ROCM_VERSION_DEV_MINOR}.${ROCM_VERSION_DEV_PATCH}") + math(EXPR ROCM_VERSION_DEV_INT "(${ROCM_VERSION_DEV_MAJOR}*10000) + (${ROCM_VERSION_DEV_MINOR}*100) + ${ROCM_VERSION_DEV_PATCH}") + else() + message(FATAL_ERROR "Cannot determine ROCm version string") + endif() + message("\n***** ROCm version from rocm_version.h ****\n") + message("ROCM_VERSION_DEV: ${ROCM_VERSION_DEV}") + message("ROCM_VERSION_DEV_MAJOR: ${ROCM_VERSION_DEV_MAJOR}") + message("ROCM_VERSION_DEV_MINOR: ${ROCM_VERSION_DEV_MINOR}") + message("ROCM_VERSION_DEV_PATCH: ${ROCM_VERSION_DEV_PATCH}") + message("ROCM_VERSION_DEV_INT: ${ROCM_VERSION_DEV_INT}") message("\n***** HIP LANGUAGE CONFIG INFO ****\n") message("CMAKE_HIP_COMPILER: ${CMAKE_HIP_COMPILER}") message("CMAKE_HIP_ARCHITECTURES: ${CMAKE_HIP_ARCHITECTURES}") @@ -534,6 +605,11 @@ if (onnxruntime_EXTENDED_MINIMAL_BUILD AND NOT onnxruntime_MINIMAL_BUILD) set(onnxruntime_MINIMAL_BUILD ON) endif() +set(REPO_ROOT ${PROJECT_SOURCE_DIR}/..) +set(ONNXRUNTIME_ROOT ${PROJECT_SOURCE_DIR}/../onnxruntime) +set(ORTTRAINING_ROOT ${PROJECT_SOURCE_DIR}/../orttraining) +set(ORTTRAINING_SOURCE_DIR ${ORTTRAINING_ROOT}/orttraining) + include(adjust_global_compile_flags.cmake) if (APPLE) @@ -565,10 +641,6 @@ if (NOT MSVC AND NOT (CMAKE_SYSTEM_NAME STREQUAL "iOS")) set(CMAKE_REQUIRED_FLAGS ${OLD_CMAKE_REQUIRED_FLAGS}) endif() -set(REPO_ROOT ${PROJECT_SOURCE_DIR}/..) -set(ONNXRUNTIME_ROOT ${PROJECT_SOURCE_DIR}/../onnxruntime) -set(ORTTRAINING_ROOT ${PROJECT_SOURCE_DIR}/../orttraining) -set(ORTTRAINING_SOURCE_DIR ${ORTTRAINING_ROOT}/orttraining) file (STRINGS "${REPO_ROOT}/VERSION_NUMBER" ORT_VERSION) @@ -637,7 +709,7 @@ if (WIN32) # structure was padded due to __declspec(align()) list(APPEND ORT_WARNING_FLAGS "/wd4324") # warning C4800: Implicit conversion from 'X' to bool. Possible information loss - if (onnxruntime_USE_OPENVINO) + if (onnxruntime_USE_OPENVINO OR onnxruntime_USE_OPENVINO_INTERFACE) list(APPEND ORT_WARNING_FLAGS "/wd4800") endif() # operator 'operator-name': deprecated between enumerations of different types @@ -679,6 +751,7 @@ else() check_cxx_compiler_flag(-Wdeprecated-builtins HAS_DEPRECATED_BUILTINS) check_cxx_compiler_flag(-Wdeprecated-copy HAS_DEPRECATED_COPY) check_cxx_compiler_flag(-Wdeprecated-declarations HAS_DEPRECATED_DECLARATIONS) + check_cxx_compiler_flag(-Wdeprecated-literal-operator HAS_DEPRECATED_LITERAL_OPERATOR) check_cxx_compiler_flag(-Wdeprecated-this-capture HAS_DEPRECATED_THIS_CAPTURE) check_cxx_compiler_flag(-Wenum-constexpr-conversion HAS_ENUM_CONSTEXPR_CONVERSION) check_cxx_compiler_flag(-Wformat-truncation HAS_FORMAT_TRUNCATION) @@ -735,6 +808,9 @@ else() if (HAS_DEPRECATED_BUILTINS) list(APPEND ORT_WARNING_FLAGS -Wno-deprecated-builtins) endif() + if (HAS_DEPRECATED_LITERAL_OPERATOR) + list(APPEND ORT_WARNING_FLAGS -Wno-deprecated-literal-operator) + endif() #see:https://reviews.llvm.org/D131307 #It was intended that the 'enum-constexpr-conversion' type warnings can not be silenced by -w if(HAS_ENUM_CONSTEXPR_CONVERSION AND NOT Protobuf_FOUND) @@ -794,7 +870,7 @@ else() set(onnxruntime_USE_MEMORY_EFFICIENT_ATTENTION OFF) endif() -if (onnxruntime_USE_CUDA) +if (onnxruntime_USE_CUDA OR onnxruntime_USE_CUDA_INTERFACE) list(APPEND ORT_PROVIDER_FLAGS -DUSE_CUDA=1) list(APPEND ORT_PROVIDER_CMAKE_FLAGS -Donnxruntime_USE_CUDA=1) list(APPEND ONNXRUNTIME_PROVIDER_NAMES cuda) @@ -818,7 +894,7 @@ if (onnxruntime_USE_CUDA) endif() endif() -if (onnxruntime_USE_VITISAI) +if (onnxruntime_USE_VITISAI OR onnxruntime_USE_VITISAI_INTERFACE) list(APPEND ORT_PROVIDER_FLAGS -DUSE_VITISAI=1) list(APPEND ORT_PROVIDER_CMAKE_FLAGS -Donnxruntime_USE_VITISAI=1) list(APPEND ONNXRUNTIME_PROVIDER_NAMES vitisai) @@ -828,12 +904,12 @@ if (onnxruntime_USE_DNNL) list(APPEND ONNXRUNTIME_PROVIDER_NAMES dnnl) list(APPEND ORT_PROVIDER_CMAKE_FLAGS -Donnxruntime_USE_DNNL=1) endif() -if (onnxruntime_USE_OPENVINO) +if (onnxruntime_USE_OPENVINO OR onnxruntime_USE_OPENVINO_INTERFACE) list(APPEND ORT_PROVIDER_FLAGS -DUSE_OPENVINO=1) list(APPEND ONNXRUNTIME_PROVIDER_NAMES openvino) list(APPEND ORT_PROVIDER_CMAKE_FLAGS -Donnxruntime_USE_OPENVINO=1) endif() -if (onnxruntime_USE_TENSORRT) +if (onnxruntime_USE_TENSORRT OR onnxruntime_USE_TENSORRT_INTERFACE) list(APPEND ORT_PROVIDER_FLAGS -DUSE_TENSORRT=1) #TODO: remove the following line and change the test code in onnxruntime_shared_lib_test to use the new EP API. list(APPEND ONNXRUNTIME_PROVIDER_NAMES tensorrt) @@ -859,7 +935,7 @@ if (onnxruntime_USE_JSEP) list(APPEND ORT_PROVIDER_CMAKE_FLAGS -Donnxruntime_USE_JSEP=1) list(APPEND ONNXRUNTIME_PROVIDER_NAMES js) endif() -if (onnxruntime_USE_QNN) +if (onnxruntime_USE_QNN OR onnxruntime_USE_QNN_INTERFACE) list(APPEND ORT_PROVIDER_FLAGS -DUSE_QNN=1) list(APPEND ORT_PROVIDER_CMAKE_FLAGS -Donnxruntime_USE_QNN=1) list(APPEND ONNXRUNTIME_PROVIDER_NAMES qnn) @@ -887,7 +963,7 @@ if (onnxruntime_USE_QNN) endif() endif() - if (MSVC OR ${CMAKE_SYSTEM_NAME} STREQUAL "Linux") + if ((NOT onnxruntime_USE_QNN_INTERFACE) AND (MSVC OR ${CMAKE_SYSTEM_NAME} STREQUAL "Linux")) file(GLOB QNN_LIB_FILES LIST_DIRECTORIES false "${onnxruntime_QNN_HOME}/lib/${QNN_ARCH_ABI}/libQnn*.so" "${onnxruntime_QNN_HOME}/lib/${QNN_ARCH_ABI}/Qnn*.dll" "${onnxruntime_QNN_HOME}/lib/${QNN_ARCH_ABI}/libHtpPrepare.so" @@ -1138,6 +1214,8 @@ function(onnxruntime_set_compile_flags target_name) # because we may mix gcc and hipclang set(ORT_HIP_WARNING_FLAGS ${ORT_WARNING_FLAGS}) list(REMOVE_ITEM ORT_HIP_WARNING_FLAGS -Wno-nonnull-compare) + # Unsupported by Clang 18 yet. + list(REMOVE_ITEM ORT_HIP_WARNING_FLAGS -Wno-dangling-reference) # float16.h:90:12: error: ‘tmp’ is used uninitialized list(APPEND ORT_HIP_WARNING_FLAGS -Wno-uninitialized) @@ -1344,7 +1422,7 @@ if (onnxruntime_ENABLE_TRAINING_APIS) ) endif() -if (onnxruntime_USE_OPENVINO) +if (onnxruntime_USE_OPENVINO OR onnxruntime_USE_OPENVINO_INTERFACE) add_definitions(-DUSE_OPENVINO=1) @@ -1357,7 +1435,7 @@ if (onnxruntime_USE_OPENVINO) add_definitions(-DOPENVINO_CONFIG_GPU=1) endif() - if (onnxruntime_USE_OPENVINO_CPU) + if (onnxruntime_USE_OPENVINO_CPU OR onnxruntime_USE_OPENVINO_INTERFACE) # OpenVino CPU interface is default built. add_definitions(-DOPENVINO_CONFIG_CPU=1) endif() diff --git a/cmake/adjust_global_compile_flags.cmake b/cmake/adjust_global_compile_flags.cmake index 8325700b423f7..8b5a744e497a6 100644 --- a/cmake/adjust_global_compile_flags.cmake +++ b/cmake/adjust_global_compile_flags.cmake @@ -12,15 +12,6 @@ if (ANDROID) set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} -Wl,-z,max-page-size=16384") endif() -# Suggested by https://gitlab.kitware.com/cmake/cmake/-/issues/20132 -# MacCatalyst is not well supported in CMake -# The error that can emerge without this flag can look like: -# "clang : error : overriding '-mmacosx-version-min=11.0' option with '-target x86_64-apple-ios14.0-macabi' [-Werror,-Woverriding-t-option]" -if (PLATFORM_NAME STREQUAL "macabi") - add_compile_options(-Wno-overriding-t-option) - add_link_options(-Wno-overriding-t-option) -endif() - # Enable space optimization for gcc/clang # Cannot use "-ffunction-sections -fdata-sections" if we enable bitcode (iOS) if (NOT MSVC AND NOT onnxruntime_ENABLE_BITCODE) @@ -378,3 +369,7 @@ if (WIN32) elseif(LINUX) add_compile_definitions("_GNU_SOURCE") endif() + +if (onnxruntime_USE_EXTENSIONS) + include_directories(${REPO_ROOT}/include/onnxruntime/core/session) +endif() \ No newline at end of file diff --git a/cmake/deps.txt b/cmake/deps.txt index cb5a5910fb3d0..c73d9a4e3532f 100644 --- a/cmake/deps.txt +++ b/cmake/deps.txt @@ -29,7 +29,7 @@ fxdiv;https://github.com/Maratyszcza/FXdiv/archive/63058eff77e11aa15bf531df5dd34 google_benchmark;https://github.com/google/benchmark/archive/refs/tags/v1.8.5.zip;cd47d3d272faf353600c8cc2fdec2b52d6f69177 googletest;https://github.com/google/googletest/archive/refs/tags/v1.15.0.zip;9d2d0af8d77ac726ea55d44a8fa727ec98311349 #xnnpack 2024.09.04 -googlexnnpack;https://github.com/google/XNNPACK/archive/309b75c9e56e0a674bf78d59872ce131f814dfb6.zip;39FA5259EAEACE0547284B63D5CEDC4F05553F5A +googlexnnpack;https://github.com/google/XNNPACK/archive/fe98e0b93565382648129271381c14d6205255e3.zip;14f61dcf17cec2cde34ba2dcf61d6f24bf6059f3 json;https://github.com/nlohmann/json/archive/refs/tags/v3.10.5.zip;f257f8dc27c5b8c085dc887b40cddd18ae1f725c microsoft_gsl;https://github.com/microsoft/GSL/archive/refs/tags/v4.0.0.zip;cf368104cd22a87b4dd0c80228919bb2df3e2a14 microsoft_wil;https://github.com/microsoft/wil/archive/refs/tags/v1.0.230629.1.zip;e4a542a323c070376f7c2d1973d0f7ddbc1d2fa5 @@ -46,17 +46,17 @@ protoc_linux_x86;https://github.com/protocolbuffers/protobuf/releases/download/v protoc_linux_aarch64;https://github.com/protocolbuffers/protobuf/releases/download/v21.12/protoc-21.12-linux-aarch_64.zip;df9d45470b0b8cf939dd2f0ec6b88e9cafc4d617 protoc_mac_universal;https://github.com/protocolbuffers/protobuf/releases/download/v21.12/protoc-21.12-osx-universal_binary.zip;23710c3d1c2036d8d65a6a22234372fa2d7af9ef psimd;https://github.com/Maratyszcza/psimd/archive/072586a71b55b7f8c584153d223e95687148a900.zip;1f5454b01f06f9656b77e4a5e2e31d7422487013 -pthreadpool;https://github.com/Maratyszcza/pthreadpool/archive/4fe0e1e183925bf8cfa6aae24237e724a96479b8.zip;07a0aa91dd9bf86f31b95497e00f31d8a261a4bd +pthreadpool;https://github.com/google/pthreadpool/archive/4e80ca24521aa0fb3a746f9ea9c3eaa20e9afbb0.zip;bd4ea65c8292801e9555b527a0ecbb2e0092c917 pybind11;https://github.com/pybind/pybind11/archive/refs/tags/v2.13.1.zip;9255d5c8568debcc329dd42ed8f410ee139ac7b1 -pytorch_cpuinfo;https://github.com/pytorch/cpuinfo/archive/ca678952a9a8eaa6de112d154e8e104b22f9ab3f.zip;138bf57d2a110935330d1048dce6d7b82d17d377 +pytorch_cpuinfo;https://github.com/pytorch/cpuinfo/archive/8a1772a0c5c447df2d18edf33ec4603a8c9c04a6.zip;85bf8a60dae026b99b6ccd78606c85ed83bfb2cd re2;https://github.com/google/re2/archive/refs/tags/2024-07-02.zip;646e1728269cde7fcef990bf4a8e87b047882e88 safeint;https://github.com/dcleblanc/SafeInt/archive/refs/tags/3.0.28.zip;23f252040ff6cb9f1fd18575b32fa8fb5928daac tensorboard;https://github.com/tensorflow/tensorboard/archive/373eb09e4c5d2b3cc2493f0949dc4be6b6a45e81.zip;67b833913605a4f3f499894ab11528a702c2b381 cutlass;https://github.com/NVIDIA/cutlass/archive/refs/tags/v3.5.1.zip;e49b2b964163d27765a5002d210a2f3c73771835 utf8_range;https://github.com/protocolbuffers/utf8_range/archive/72c943dea2b9240cd09efde15191e144bc7c7d38.zip;9925739c9debc0efa2adcb194d371a35b6a03156 -extensions;https://github.com/microsoft/onnxruntime-extensions/archive/94142d8391c9791ec71c38336436319a2d4ac7a0.zip;4365ac5140338b4cb75a39944a4be276e3829b3c +extensions;https://github.com/microsoft/onnxruntime-extensions/archive/f3f6caa6e8adb420e005ec41c6fefc8d75affb6e.zip;cec2e164f1a00e7d80fd94df65e4e8d2daead70d composable_kernel;https://github.com/ROCmSoftwarePlatform/composable_kernel/archive/204da9c522cebec5220bba52cd3542ebcaf99e7a.zip;1827348efd47831c13074245274d41b7cae8a557 directx_headers;https://github.com/microsoft/DirectX-Headers/archive/refs/tags/v1.613.1.zip;47653509a3371eabb156360f42faf582f314bf2e cudnn_frontend;https://github.com/NVIDIA/cudnn-frontend/archive/refs/tags/v1.7.0.zip;d0753d8d5b39947ca0729d7773cb84653a129eb1 -dawn;https://github.com/google/dawn/archive/12a3b24c456cebd9fd11f23ac0164f78129b00c6.zip;ad428f6dc16f1336d584f7bad5714e1097dafc43 -kleidiai;https://gitlab.arm.com/kleidi/kleidiai/-/archive/v0.2.0/kleidiai-v0.2.0.zip;B1E3173992FD91F20DB904AB77D6E901778C2681 +dawn;https://github.com/google/dawn/archive/b9b4a37041dec3dd62ac92014a6cc1aece48d9f3.zip;e8b8c2ebabdedb7c57d931fc4a19ae22146d31e1 +kleidiai;https://gitlab.arm.com/kleidi/kleidiai/-/archive/d15722976120710080ca098fe8ddabf4556cb40f/kleidiai-d15722976120710080ca098fe8ddabf4556cb40f.zip;d6c840d00c3b05aedf06e957ddaece1013d1f40b diff --git a/cmake/external/abseil-cpp.cmake b/cmake/external/abseil-cpp.cmake index 7b6e2141eeb1b..6b4404a124926 100644 --- a/cmake/external/abseil-cpp.cmake +++ b/cmake/external/abseil-cpp.cmake @@ -32,7 +32,13 @@ FetchContent_Declare( onnxruntime_fetchcontent_makeavailable(abseil_cpp) FetchContent_GetProperties(abseil_cpp) -set(ABSEIL_SOURCE_DIR ${abseil_cpp_SOURCE_DIR}) +if(abseil_cpp_SOURCE_DIR) + set(ABSEIL_SOURCE_DIR ${abseil_cpp_SOURCE_DIR}) + if(onnxruntime_USE_WEBGPU) + set(DAWN_ABSEIL_DIR ${abseil_cpp_SOURCE_DIR}) + endif() +endif() + # abseil_cpp_SOURCE_DIR is non-empty if we build it from source message(STATUS "Abseil source dir:" ${ABSEIL_SOURCE_DIR}) # abseil_cpp_VERSION is non-empty if we find a preinstalled ABSL diff --git a/cmake/external/onnxruntime_external_deps.cmake b/cmake/external/onnxruntime_external_deps.cmake index aeaaa7b51d595..e95656969866f 100644 --- a/cmake/external/onnxruntime_external_deps.cmake +++ b/cmake/external/onnxruntime_external_deps.cmake @@ -222,6 +222,11 @@ onnxruntime_fetchcontent_makeavailable(Protobuf) if(Protobuf_FOUND) message(STATUS "Protobuf version: ${Protobuf_VERSION}") else() + if(protobuf_SOURCE_DIR) + if(onnxruntime_USE_WEBGPU) + set(DAWN_PROTOBUF_DIR ${protobuf_SOURCE_DIR}) + endif() + endif() # Adjust warning flags if (TARGET libprotoc) if (NOT MSVC) @@ -522,12 +527,7 @@ if(TARGET ONNX::onnx_proto AND NOT TARGET onnx_proto) add_library(onnx_proto ALIAS ONNX::onnx_proto) endif() -find_package(Eigen3 CONFIG) -if(Eigen3_FOUND) - get_target_property(eigen_INCLUDE_DIRS Eigen3::Eigen INTERFACE_INCLUDE_DIRECTORIES) -else() - include(eigen) # FetchContent -endif() +include(external/eigen.cmake) if(onnxruntime_USE_VCPKG) find_package(wil CONFIG REQUIRED) @@ -635,74 +635,95 @@ if (onnxruntime_USE_WEBGPU) ) endif() - if (onnxruntime_BUILD_DAWN_MONOLITHIC_LIBRARY) - set(DAWN_BUILD_MONOLITHIC_LIBRARY ON CACHE BOOL "" FORCE) - set(DAWN_ENABLE_INSTALL ON CACHE BOOL "" FORCE) - - if (onnxruntime_USE_EXTERNAL_DAWN) - message(FATAL_ERROR "onnxruntime_USE_EXTERNAL_DAWN and onnxruntime_BUILD_DAWN_MONOLITHIC_LIBRARY cannot be enabled at the same time.") - endif() - else() - # use dawn::dawn_native and dawn::dawn_proc instead of the monolithic dawn::webgpu_dawn to minimize binary size - set(DAWN_BUILD_MONOLITHIC_LIBRARY OFF CACHE BOOL "" FORCE) - set(DAWN_ENABLE_INSTALL OFF CACHE BOOL "" FORCE) - endif() set(DAWN_BUILD_SAMPLES OFF CACHE BOOL "" FORCE) set(DAWN_ENABLE_NULL OFF CACHE BOOL "" FORCE) set(DAWN_FETCH_DEPENDENCIES ON CACHE BOOL "" FORCE) - # disable things we don't use - set(DAWN_DXC_ENABLE_ASSERTS_IN_NDEBUG OFF) - set(DAWN_ENABLE_DESKTOP_GL OFF CACHE BOOL "" FORCE) - set(DAWN_ENABLE_OPENGLES OFF CACHE BOOL "" FORCE) - set(DAWN_SUPPORTS_GLFW_FOR_WINDOWING OFF CACHE BOOL "" FORCE) - set(DAWN_USE_GLFW OFF CACHE BOOL "" FORCE) - set(DAWN_USE_WINDOWS_UI OFF CACHE BOOL "" FORCE) - set(DAWN_USE_X11 OFF CACHE BOOL "" FORCE) - - set(TINT_BUILD_TESTS OFF CACHE BOOL "" FORCE) - set(TINT_BUILD_CMD_TOOLS OFF CACHE BOOL "" FORCE) - set(TINT_BUILD_GLSL_WRITER OFF CACHE BOOL "" FORCE) - set(TINT_BUILD_GLSL_VALIDATOR OFF CACHE BOOL "" FORCE) - set(TINT_BUILD_IR_BINARY OFF CACHE BOOL "" FORCE) - set(TINT_BUILD_SPV_READER OFF CACHE BOOL "" FORCE) # don't need. disabling is a large binary size saving - set(TINT_BUILD_WGSL_WRITER ON CACHE BOOL "" FORCE) # needed to create cache key. runtime error if not enabled. - - # SPIR-V validation shouldn't be required given we're using Tint to create the SPIR-V. - set(DAWN_ENABLE_SPIRV_VALIDATION OFF CACHE BOOL "" FORCE) - - if (WIN32) - # building this requires the HLSL writer to be enabled in Tint. TBD if that we need either of these to be ON. - set(DAWN_USE_BUILT_DXC ON CACHE BOOL "" FORCE) - set(TINT_BUILD_HLSL_WRITER ON CACHE BOOL "" FORCE) - - if ((NOT onnxruntime_ENABLE_DAWN_BACKEND_VULKAN) AND (NOT onnxruntime_ENABLE_DAWN_BACKEND_D3D12)) - message(FATAL_ERROR "At least one of onnxruntime_ENABLE_DAWN_BACKEND_VULKAN or onnxruntime_ENABLE_DAWN_BACKEND_D3D12 must be enabled when using Dawn on Windows.") - endif() - if (onnxruntime_ENABLE_DAWN_BACKEND_VULKAN) - set(DAWN_ENABLE_VULKAN ON CACHE BOOL "" FORCE) - set(TINT_BUILD_SPV_WRITER ON CACHE BOOL "" FORCE) + if (CMAKE_SYSTEM_NAME STREQUAL "Emscripten") + set(DAWN_EMSCRIPTEN_TOOLCHAIN "${REPO_ROOT}/cmake/external/emsdk/upstream/emscripten" CACHE STRING "" FORCE) + + # Add the missing files from the emsdk installation + # + # For a "standard" emscripten build, the folder "${DAWN_EMSCRIPTEN_TOOLCHAIN}/tools/maint/" is not used. This is the + # reason why EMSDK installation does not include it. + # However, currently the WebGPU support in Emscripten is still being developed and the Dawn project is maintaining + # a fork of the Emscripten toolchain. As an extra build step, Dawn needs to generate some files using the file + # "${DAWN_EMSCRIPTEN_TOOLCHAIN}/tools/maint/gen_struct_info.py" from emscripten, which is missing in the emscripten + # installed by emsdk. + # + # We keep a copy of the missing file(s) in ${PROJECT_SOURCE_DIR}/patches/emscripten/, and now we extract them to the + # emscripten toolchain folder. + execute_process(COMMAND ${CMAKE_COMMAND} -E tar x "${PROJECT_SOURCE_DIR}/patches/emscripten/patch_3.1.74.tgz" + WORKING_DIRECTORY ${DAWN_EMSCRIPTEN_TOOLCHAIN}) + else() + if (onnxruntime_BUILD_DAWN_MONOLITHIC_LIBRARY) + set(DAWN_BUILD_MONOLITHIC_LIBRARY ON CACHE BOOL "" FORCE) + set(DAWN_ENABLE_INSTALL ON CACHE BOOL "" FORCE) + + if (onnxruntime_USE_EXTERNAL_DAWN) + message(FATAL_ERROR "onnxruntime_USE_EXTERNAL_DAWN and onnxruntime_BUILD_DAWN_MONOLITHIC_LIBRARY cannot be enabled at the same time.") + endif() else() - set(DAWN_ENABLE_VULKAN OFF CACHE BOOL "" FORCE) + # use dawn::dawn_native and dawn::dawn_proc instead of the monolithic dawn::webgpu_dawn to minimize binary size + set(DAWN_BUILD_MONOLITHIC_LIBRARY OFF CACHE BOOL "" FORCE) + set(DAWN_ENABLE_INSTALL OFF CACHE BOOL "" FORCE) endif() - if (onnxruntime_ENABLE_DAWN_BACKEND_D3D12) - set(DAWN_ENABLE_D3D12 ON CACHE BOOL "" FORCE) - else() - set(DAWN_ENABLE_D3D12 OFF CACHE BOOL "" FORCE) + + # disable things we don't use + set(DAWN_DXC_ENABLE_ASSERTS_IN_NDEBUG OFF) + set(DAWN_ENABLE_DESKTOP_GL OFF CACHE BOOL "" FORCE) + set(DAWN_ENABLE_OPENGLES OFF CACHE BOOL "" FORCE) + set(DAWN_SUPPORTS_GLFW_FOR_WINDOWING OFF CACHE BOOL "" FORCE) + set(DAWN_USE_GLFW OFF CACHE BOOL "" FORCE) + set(DAWN_USE_WINDOWS_UI OFF CACHE BOOL "" FORCE) + set(DAWN_USE_X11 OFF CACHE BOOL "" FORCE) + + set(TINT_BUILD_TESTS OFF CACHE BOOL "" FORCE) + set(TINT_BUILD_CMD_TOOLS OFF CACHE BOOL "" FORCE) + set(TINT_BUILD_GLSL_WRITER OFF CACHE BOOL "" FORCE) + set(TINT_BUILD_GLSL_VALIDATOR OFF CACHE BOOL "" FORCE) + set(TINT_BUILD_IR_BINARY OFF CACHE BOOL "" FORCE) + set(TINT_BUILD_SPV_READER OFF CACHE BOOL "" FORCE) # don't need. disabling is a large binary size saving + set(TINT_BUILD_WGSL_WRITER ON CACHE BOOL "" FORCE) # needed to create cache key. runtime error if not enabled. + + # SPIR-V validation shouldn't be required given we're using Tint to create the SPIR-V. + set(DAWN_ENABLE_SPIRV_VALIDATION OFF CACHE BOOL "" FORCE) + + if (WIN32) + # building this requires the HLSL writer to be enabled in Tint. TBD if that we need either of these to be ON. + set(DAWN_USE_BUILT_DXC ON CACHE BOOL "" FORCE) + set(TINT_BUILD_HLSL_WRITER ON CACHE BOOL "" FORCE) + + if ((NOT onnxruntime_ENABLE_DAWN_BACKEND_VULKAN) AND (NOT onnxruntime_ENABLE_DAWN_BACKEND_D3D12)) + message(FATAL_ERROR "At least one of onnxruntime_ENABLE_DAWN_BACKEND_VULKAN or onnxruntime_ENABLE_DAWN_BACKEND_D3D12 must be enabled when using Dawn on Windows.") + endif() + if (onnxruntime_ENABLE_DAWN_BACKEND_VULKAN) + set(DAWN_ENABLE_VULKAN ON CACHE BOOL "" FORCE) + set(TINT_BUILD_SPV_WRITER ON CACHE BOOL "" FORCE) + else() + set(DAWN_ENABLE_VULKAN OFF CACHE BOOL "" FORCE) + endif() + if (onnxruntime_ENABLE_DAWN_BACKEND_D3D12) + set(DAWN_ENABLE_D3D12 ON CACHE BOOL "" FORCE) + else() + set(DAWN_ENABLE_D3D12 OFF CACHE BOOL "" FORCE) + endif() + # We are currently always using the D3D12 backend. + set(DAWN_ENABLE_D3D11 OFF CACHE BOOL "" FORCE) endif() - # We are currently always using the D3D12 backend. - set(DAWN_ENABLE_D3D11 OFF CACHE BOOL "" FORCE) endif() onnxruntime_fetchcontent_makeavailable(dawn) - if (onnxruntime_BUILD_DAWN_MONOLITHIC_LIBRARY) - list(APPEND onnxruntime_EXTERNAL_LIBRARIES dawn::webgpu_dawn) - else() - if (NOT onnxruntime_USE_EXTERNAL_DAWN) - list(APPEND onnxruntime_EXTERNAL_LIBRARIES dawn::dawn_native) + if (NOT CMAKE_SYSTEM_NAME STREQUAL "Emscripten") + if (onnxruntime_BUILD_DAWN_MONOLITHIC_LIBRARY) + list(APPEND onnxruntime_EXTERNAL_LIBRARIES dawn::webgpu_dawn) + else() + if (NOT onnxruntime_USE_EXTERNAL_DAWN) + list(APPEND onnxruntime_EXTERNAL_LIBRARIES dawn::dawn_native) + endif() + list(APPEND onnxruntime_EXTERNAL_LIBRARIES dawn::dawn_proc) endif() - list(APPEND onnxruntime_EXTERNAL_LIBRARIES dawn::dawn_proc) endif() endif() diff --git a/cmake/external/xnnpack.cmake b/cmake/external/xnnpack.cmake index 3298c078b592a..0be2c7082bcc8 100644 --- a/cmake/external/xnnpack.cmake +++ b/cmake/external/xnnpack.cmake @@ -1,8 +1,7 @@ set(XNNPACK_USE_SYSTEM_LIBS ON CACHE INTERNAL "") set(XNNPACK_BUILD_TESTS OFF CACHE INTERNAL "") set(XNNPACK_BUILD_BENCHMARKS OFF CACHE INTERNAL "") -set(FP16_BUILD_TESTS OFF CACHE INTERNAL "") -set(FP16_BUILD_BENCHMARKS OFF CACHE INTERNAL "") + set(PTHREADPOOL_BUILD_TESTS OFF CACHE INTERNAL "") set(PTHREADPOOL_BUILD_BENCHMARKS OFF CACHE INTERNAL "") set(KLEIDIAI_BUILD_TESTS OFF CACHE INTERNAL "") @@ -17,44 +16,6 @@ if(CMAKE_ANDROID_ARCH_ABI STREQUAL armeabi-v7a) set(XNNPACK_ENABLE_ARM_BF16 OFF) endif() -# fp16 depends on psimd -FetchContent_Declare(psimd URL ${DEP_URL_psimd} URL_HASH SHA1=${DEP_SHA1_psimd}) -onnxruntime_fetchcontent_makeavailable(psimd) -set(PSIMD_SOURCE_DIR ${psimd_SOURCE_DIR}) - -block(PROPAGATE fp16_PATCH_COMMAND) - # only apply fp16 patch for Apple x86_64 targets - - if(APPLE) - if(NOT "${CMAKE_OSX_ARCHITECTURES}" STREQUAL "") - if ("x86_64" IN_LIST CMAKE_OSX_ARCHITECTURES) - set(fp16_PATCH_REQUIRED 1) - endif() - else() - # CMAKE_OSX_ARCHITECTURES unspecified, check host - if(CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64") - set(fp16_PATCH_REQUIRED 1) - endif() - endif() - endif() - - if(fp16_PATCH_REQUIRED) - message(STATUS "Applying fp16 patch.") - set(fp16_PATCH_FILE ${PROJECT_SOURCE_DIR}/patches/fp16/remove_math_h_dependency_from_fp16_h.patch) - set(fp16_PATCH_COMMAND ${Patch_EXECUTABLE} --binary --ignore-whitespace -p1 < ${fp16_PATCH_FILE}) - else() - set(fp16_PATCH_COMMAND "") - endif() -endblock() - -FetchContent_Declare( - fp16 - URL ${DEP_URL_fp16} - URL_HASH SHA1=${DEP_SHA1_fp16} - PATCH_COMMAND ${fp16_PATCH_COMMAND} - ) -onnxruntime_fetchcontent_makeavailable(fp16) - # pthreadpool depends on fxdiv FetchContent_Declare(fxdiv URL ${DEP_URL_fxdiv} URL_HASH SHA1=${DEP_SHA1_fxdiv}) onnxruntime_fetchcontent_makeavailable(fxdiv) diff --git a/cmake/onnxruntime.cmake b/cmake/onnxruntime.cmake index d72b61a0859b2..78edb4179fafd 100644 --- a/cmake/onnxruntime.cmake +++ b/cmake/onnxruntime.cmake @@ -199,17 +199,12 @@ if(CMAKE_SYSTEM_NAME STREQUAL "Android" AND onnxruntime_BUILD_JAVA) endforeach() endif() -# This list is a reversed topological ordering of library dependencies. -# Earlier entries may depend on later ones. Later ones should not depend on earlier ones. -set(onnxruntime_INTERNAL_LIBRARIES - onnxruntime_session - ${onnxruntime_libs} +set(onnxruntime_INTERNAL_PROVIDER_LIBRARIES ${PROVIDERS_ACL} ${PROVIDERS_ARMNN} ${PROVIDERS_COREML} ${PROVIDERS_DML} ${PROVIDERS_NNAPI} - ${PROVIDERS_QNN} ${PROVIDERS_SNPE} ${PROVIDERS_RKNPU} ${PROVIDERS_VSINPU} @@ -218,6 +213,18 @@ set(onnxruntime_INTERNAL_LIBRARIES ${PROVIDERS_WEBNN} ${PROVIDERS_AZURE} ${PROVIDERS_INTERNAL_TESTING} +) + +if (onnxruntime_BUILD_QNN_EP_STATIC_LIB) + list(APPEND onnxruntime_INTERNAL_PROVIDER_LIBRARIES onnxruntime_providers_qnn) +endif() + +# This list is a reversed topological ordering of library dependencies. +# Earlier entries may depend on later ones. Later ones should not depend on earlier ones. +set(onnxruntime_INTERNAL_LIBRARIES + onnxruntime_session + ${onnxruntime_libs} + ${onnxruntime_INTERNAL_PROVIDER_LIBRARIES} ${onnxruntime_winml} onnxruntime_optimizer onnxruntime_providers diff --git a/cmake/onnxruntime_config.h.in b/cmake/onnxruntime_config.h.in index bbddefe531cb8..f82a23bf4026b 100644 --- a/cmake/onnxruntime_config.h.in +++ b/cmake/onnxruntime_config.h.in @@ -9,6 +9,7 @@ #cmakedefine HAS_CLASS_MEMACCESS #cmakedefine HAS_DEPRECATED_COPY #cmakedefine HAS_DEPRECATED_DECLARATIONS +#cmakedefine HAS_DEPRECATED_LITERAL_OPERATOR #cmakedefine HAS_DEPRECATED_THIS_CAPTURE #cmakedefine HAS_FORMAT_TRUNCATION #cmakedefine HAS_IGNORED_ATTRIBUTES diff --git a/cmake/onnxruntime_java.cmake b/cmake/onnxruntime_java.cmake index b15b9632e9e24..1227264e595ed 100644 --- a/cmake/onnxruntime_java.cmake +++ b/cmake/onnxruntime_java.cmake @@ -148,7 +148,7 @@ if (WIN32) if(NOT onnxruntime_ENABLE_STATIC_ANALYSIS) add_custom_command(TARGET onnxruntime4j_jni POST_BUILD COMMAND ${CMAKE_COMMAND} -E copy_if_different $ ${JAVA_PACKAGE_LIB_DIR}/$) add_custom_command(TARGET onnxruntime4j_jni POST_BUILD COMMAND ${CMAKE_COMMAND} -E copy_if_different $ ${JAVA_PACKAGE_JNI_DIR}/$) - if (onnxruntime_USE_CUDA OR onnxruntime_USE_DNNL OR onnxruntime_USE_OPENVINO OR onnxruntime_USE_TENSORRT) + if (onnxruntime_USE_CUDA OR onnxruntime_USE_DNNL OR onnxruntime_USE_OPENVINO OR onnxruntime_USE_TENSORRT OR (onnxruntime_USE_QNN AND NOT onnxruntime_BUILD_QNN_EP_STATIC_LIB)) add_custom_command(TARGET onnxruntime4j_jni POST_BUILD COMMAND ${CMAKE_COMMAND} -E copy_if_different $ ${JAVA_PACKAGE_LIB_DIR}/$) endif() if (onnxruntime_USE_CUDA) @@ -163,11 +163,14 @@ if (WIN32) if (onnxruntime_USE_TENSORRT) add_custom_command(TARGET onnxruntime4j_jni POST_BUILD COMMAND ${CMAKE_COMMAND} -E copy_if_different $ ${JAVA_PACKAGE_LIB_DIR}/$) endif() + if (onnxruntime_USE_QNN AND NOT onnxruntime_BUILD_QNN_EP_STATIC_LIB) + add_custom_command(TARGET onnxruntime4j_jni POST_BUILD COMMAND ${CMAKE_COMMAND} -E copy_if_different $ ${JAVA_PACKAGE_LIB_DIR}/$) + endif() endif() else() add_custom_command(TARGET onnxruntime4j_jni POST_BUILD COMMAND ${CMAKE_COMMAND} -E copy_if_different $ ${JAVA_PACKAGE_LIB_DIR}/$) add_custom_command(TARGET onnxruntime4j_jni POST_BUILD COMMAND ${CMAKE_COMMAND} -E copy_if_different $ ${JAVA_PACKAGE_JNI_DIR}/$) - if (onnxruntime_USE_CUDA OR onnxruntime_USE_DNNL OR onnxruntime_USE_OPENVINO OR onnxruntime_USE_TENSORRT) + if (onnxruntime_USE_CUDA OR onnxruntime_USE_DNNL OR onnxruntime_USE_OPENVINO OR onnxruntime_USE_TENSORRT OR (onnxruntime_USE_QNN AND NOT onnxruntime_BUILD_QNN_EP_STATIC_LIB)) add_custom_command(TARGET onnxruntime4j_jni POST_BUILD COMMAND ${CMAKE_COMMAND} -E copy_if_different $ ${JAVA_PACKAGE_LIB_DIR}/$) endif() if (onnxruntime_USE_CUDA) @@ -182,6 +185,9 @@ else() if (onnxruntime_USE_TENSORRT) add_custom_command(TARGET onnxruntime4j_jni POST_BUILD COMMAND ${CMAKE_COMMAND} -E copy_if_different $ ${JAVA_PACKAGE_LIB_DIR}/$) endif() + if (onnxruntime_USE_QNN AND NOT onnxruntime_BUILD_QNN_EP_STATIC_LIB) + add_custom_command(TARGET onnxruntime4j_jni POST_BUILD COMMAND ${CMAKE_COMMAND} -E copy_if_different $ ${JAVA_PACKAGE_LIB_DIR}/$) + endif() endif() # run the build process (this copies the results back into CMAKE_CURRENT_BINARY_DIR) diff --git a/cmake/onnxruntime_mlas.cmake b/cmake/onnxruntime_mlas.cmake index 5124262ec0004..ed3ad89247975 100644 --- a/cmake/onnxruntime_mlas.cmake +++ b/cmake/onnxruntime_mlas.cmake @@ -95,6 +95,8 @@ function(setup_mlas_source_for_windows) ${MLAS_SRC_DIR}/rotary_embedding_kernel_neon.h ${MLAS_SRC_DIR}/rotary_embedding_kernel_neon.cpp ${MLAS_SRC_DIR}/rotary_embedding_kernel_neon_fp16.cpp + ${MLAS_SRC_DIR}/hgemm_kernel_neon.cpp + ${MLAS_SRC_DIR}/halfgemm_kernel_neon_fp16.cpp ) set(mlas_platform_preprocess_srcs @@ -374,6 +376,7 @@ else() ${MLAS_SRC_DIR}/sqnbitgemm_kernel_neon_int8.cpp ${MLAS_SRC_DIR}/rotary_embedding_kernel_neon.h ${MLAS_SRC_DIR}/rotary_embedding_kernel_neon.cpp + ${MLAS_SRC_DIR}/hgemm_kernel_neon.cpp ) set_source_files_properties(${MLAS_SRC_DIR}/sqnbitgemm_kernel_neon_int8.cpp PROPERTIES COMPILE_FLAGS " -march=armv8.2-a+dotprod") @@ -394,6 +397,7 @@ else() ${MLAS_SRC_DIR}/cast_kernel_neon.cpp ${MLAS_SRC_DIR}/hqnbitgemm_kernel_neon_fp16.cpp ${MLAS_SRC_DIR}/rotary_embedding_kernel_neon_fp16.cpp + ${MLAS_SRC_DIR}/halfgemm_kernel_neon_fp16.cpp ) set_source_files_properties(${MLAS_SRC_DIR}/aarch64/HalfGemmKernelNeon.S PROPERTIES COMPILE_FLAGS " -march=armv8.2-a+fp16 ") set_source_files_properties(${MLAS_SRC_DIR}/aarch64/QgemmS8S8KernelSmmla.S PROPERTIES COMPILE_FLAGS " -march=armv8.2-a+i8mm ") @@ -406,6 +410,7 @@ else() set_source_files_properties(${MLAS_SRC_DIR}/cast_kernel_neon.cpp PROPERTIES COMPILE_FLAGS " -march=armv8.2-a+fp16 ") set_source_files_properties(${MLAS_SRC_DIR}/hqnbitgemm_kernel_neon_fp16.cpp PROPERTIES COMPILE_FLAGS " -march=armv8.2-a+fp16 ") set_source_files_properties(${MLAS_SRC_DIR}/rotary_embedding_kernel_neon_fp16.cpp PROPERTIES COMPILE_FLAGS " -march=armv8.2-a+fp16 ") + set_source_files_properties(${MLAS_SRC_DIR}/halfgemm_kernel_neon_fp16.cpp PROPERTIES COMPILE_FLAGS " -march=armv8.2-a+fp16 ") endif() if(ONNXRUNTIME_MLAS_MULTI_ARCH) diff --git a/cmake/onnxruntime_providers.cmake b/cmake/onnxruntime_providers.cmake index 582491de9503d..67fa48b28278d 100644 --- a/cmake/onnxruntime_providers.cmake +++ b/cmake/onnxruntime_providers.cmake @@ -74,9 +74,6 @@ endif() if(onnxruntime_USE_JSEP) set(PROVIDERS_JS onnxruntime_providers_js) endif() -if(onnxruntime_USE_QNN) - set(PROVIDERS_QNN onnxruntime_providers_qnn) -endif() if(onnxruntime_USE_RKNPU) set(PROVIDERS_RKNPU onnxruntime_providers_rknpu) endif() diff --git a/cmake/onnxruntime_providers_coreml.cmake b/cmake/onnxruntime_providers_coreml.cmake index 0aa25a221bf27..18048c8cdce2f 100644 --- a/cmake/onnxruntime_providers_coreml.cmake +++ b/cmake/onnxruntime_providers_coreml.cmake @@ -8,25 +8,18 @@ endif() add_compile_definitions(USE_COREML=1) # Check if we can build the coremltools code for creating an mlpackage with an mlprogram. -# The coremltools source requires std::filesystem::path which is only available from iOS 13 on. -set(_enable_ML_PROGRAM ON) -if (IOS AND CMAKE_OSX_DEPLOYMENT_TARGET VERSION_LESS 13.0) - message(WARNING "CoreML ML Program is not supported on iOS < 13.0. Excluding ML Program support from build.") - set(_enable_ML_PROGRAM OFF) -elseif(LINUX) - # uuid-dev is required. we don't bother installing on CIs as it's really for manual developer testing. +if(LINUX) find_library(LibUUID_LIBRARY NAMES uuid) find_path(LibUUID_INCLUDE_DIR NAMES uuid/uuid.h) if (NOT LibUUID_INCLUDE_DIR) - message(STATUS "uuid/uuid.h was not found as is required for ML Program support. " + message(FATAL "uuid/uuid.h was not found as is required for ML Program support. " "Run `sudo apt install uuid-dev` if you need to test ML Program related CoreML EP code. ") - set(_enable_ML_PROGRAM OFF) endif() endif() -if (_enable_ML_PROGRAM) - add_compile_definitions(COREML_ENABLE_MLPROGRAM=1) -endif() + +add_compile_definitions(COREML_ENABLE_MLPROGRAM=1) + # Compile CoreML proto definition to ${CMAKE_CURRENT_BINARY_DIR}/coreml_proto set(COREML_PROTO_ROOT ${coremltools_SOURCE_DIR}/mlmodel/format) @@ -93,10 +86,10 @@ file(GLOB_RECURSE "${ONNXRUNTIME_ROOT}/core/providers/coreml/builders/*.cc" ) -if(_enable_ML_PROGRAM) + # Add helpers to create mlpackage weights. limit to just the files we need to minimize the changes to make them # build on Windows and Linux. - file(GLOB +file(GLOB onnxruntime_providers_coreml_milblob_cc_srcs CONFIGURE_DEPENDS "${coremltools_SOURCE_DIR}/mlmodel/src/MILBlob/*.hpp" "${coremltools_SOURCE_DIR}/mlmodel/src/MILBlob/*.cpp" @@ -105,22 +98,22 @@ if(_enable_ML_PROGRAM) "${coremltools_SOURCE_DIR}/mlmodel/src/MILBlob/Blob/StorageFormat.hpp" "${coremltools_SOURCE_DIR}/mlmodel/src/MILBlob/Blob/FileWriter.?pp" "${coremltools_SOURCE_DIR}/mlmodel/src/MILBlob/Blob/StorageWriter.?pp" - ) +) - # Add helpers to create mlpackage - file(GLOB +# Add helpers to create mlpackage +file(GLOB onnxruntime_providers_coreml_modelpackage_cc_srcs CONFIGURE_DEPENDS "${coremltools_SOURCE_DIR}/modelpackage/src/ModelPackage.?pp" "${coremltools_SOURCE_DIR}/modelpackage/src/utils/JsonMap.?pp" - ) +) - set(coremltools_srcs +set(coremltools_srcs ${onnxruntime_providers_coreml_milblob_cc_srcs} ${onnxruntime_providers_coreml_modelpackage_cc_srcs} - ) +) + +source_group(TREE ${coremltools_SOURCE_DIR} PREFIX coremltools FILES ${coremltools_srcs}) - source_group(TREE ${coremltools_SOURCE_DIR} PREFIX coremltools FILES ${coremltools_srcs}) -endif() # Add CoreML objective c++ source code if (APPLE) @@ -174,34 +167,34 @@ if (APPLE) target_compile_definitions(onnxruntime_providers_coreml PRIVATE __APPLE__) endif() -if (_enable_ML_PROGRAM) - # Setup coremltools fp16 and json dependencies for creating an mlpackage. - # - # These are also used by external/xnnpack.cmake. fp16 depends on psimd - FetchContent_Declare(psimd URL ${DEP_URL_psimd} URL_HASH SHA1=${DEP_SHA1_psimd}) - onnxruntime_fetchcontent_makeavailable(psimd) - set(PSIMD_SOURCE_DIR ${psimd_SOURCE_DIR}) - FetchContent_Declare(fp16 URL ${DEP_URL_fp16} URL_HASH SHA1=${DEP_SHA1_fp16}) - set(FP16_BUILD_TESTS OFF CACHE INTERNAL "") - set(FP16_BUILD_BENCHMARKS OFF CACHE INTERNAL "") - onnxruntime_fetchcontent_makeavailable(fp16) - - # need to tweak the include paths to match what the coreml source code expects - target_include_directories(onnxruntime_providers_coreml PRIVATE - ${fp16_SOURCE_DIR}/include - ${nlohmann_json_SOURCE_DIR}/single_include/nlohmann - ${coremltools_SOURCE_DIR} - ${coremltools_SOURCE_DIR}/mlmodel/src/ - ${coremltools_SOURCE_DIR}/modelpackage/src/ - ) - add_dependencies(onnxruntime_providers_coreml nlohmann_json::nlohmann_json fp16) +# Setup coremltools fp16 and json dependencies for creating an mlpackage. +# +# fp16 depends on psimd +FetchContent_Declare(psimd URL ${DEP_URL_psimd} URL_HASH SHA1=${DEP_SHA1_psimd}) +onnxruntime_fetchcontent_makeavailable(psimd) +set(PSIMD_SOURCE_DIR ${psimd_SOURCE_DIR}) +FetchContent_Declare(fp16 URL ${DEP_URL_fp16} URL_HASH SHA1=${DEP_SHA1_fp16}) +set(FP16_BUILD_TESTS OFF CACHE INTERNAL "") +set(FP16_BUILD_BENCHMARKS OFF CACHE INTERNAL "") +onnxruntime_fetchcontent_makeavailable(fp16) + +# need to tweak the include paths to match what the coreml source code expects +target_include_directories(onnxruntime_providers_coreml PRIVATE + ${fp16_SOURCE_DIR}/include + ${nlohmann_json_SOURCE_DIR}/single_include/nlohmann + ${coremltools_SOURCE_DIR} + ${coremltools_SOURCE_DIR}/mlmodel/src/ + ${coremltools_SOURCE_DIR}/modelpackage/src/ +) - if (LINUX) - target_link_libraries(onnxruntime_providers_coreml PRIVATE uuid) - endif() +add_dependencies(onnxruntime_providers_coreml nlohmann_json::nlohmann_json fp16) + +if (LINUX) + target_link_libraries(onnxruntime_providers_coreml PRIVATE uuid) endif() + if (APPLE) target_link_libraries(onnxruntime_providers_coreml PRIVATE "-framework Foundation" "-framework CoreML") endif() diff --git a/cmake/onnxruntime_providers_cpu.cmake b/cmake/onnxruntime_providers_cpu.cmake index 91a2b13002ec9..4ae89a392278f 100644 --- a/cmake/onnxruntime_providers_cpu.cmake +++ b/cmake/onnxruntime_providers_cpu.cmake @@ -239,7 +239,9 @@ if (NOT onnxruntime_MINIMAL_BUILD AND NOT onnxruntime_EXTENDED_MINIMAL_BUILD set_property(TARGET onnxruntime_providers_shared APPEND_STRING PROPERTY LINK_FLAGS "-Xlinker -exported_symbols_list ${ONNXRUNTIME_ROOT}/core/providers/shared/exported_symbols.lst") elseif(UNIX) if(NOT ${CMAKE_SYSTEM_NAME} MATCHES "AIX") - set_property(TARGET onnxruntime_providers_shared APPEND_STRING PROPERTY LINK_FLAGS "-Xlinker --version-script=${ONNXRUNTIME_ROOT}/core/providers/shared/version_script.lds -Xlinker --gc-sections") + target_link_options(onnxruntime_providers_shared PRIVATE + "LINKER:--version-script=${ONNXRUNTIME_ROOT}/core/providers/shared/version_script.lds" + "LINKER:--gc-sections") endif() elseif(WIN32) set_property(TARGET onnxruntime_providers_shared APPEND_STRING PROPERTY LINK_FLAGS "-DEF:${ONNXRUNTIME_ROOT}/core/providers/shared/symbols.def") diff --git a/cmake/onnxruntime_providers_qnn.cmake b/cmake/onnxruntime_providers_qnn.cmake index b68d84c23bb32..303020145889b 100644 --- a/cmake/onnxruntime_providers_qnn.cmake +++ b/cmake/onnxruntime_providers_qnn.cmake @@ -3,41 +3,89 @@ add_compile_definitions(USE_QNN=1) - # These are shared utils, - # TODO, move to a separate lib when used by EPs other than QNN, NNAPI and CoreML - file(GLOB onnxruntime_providers_shared_utils_cc_srcs CONFIGURE_DEPENDS - "${ONNXRUNTIME_ROOT}/core/providers/shared/utils/utils.h" - "${ONNXRUNTIME_ROOT}/core/providers/shared/utils/utils.cc" - ) + if(onnxruntime_BUILD_QNN_EP_STATIC_LIB) + add_compile_definitions(BUILD_QNN_EP_STATIC_LIB=1) + endif() file(GLOB_RECURSE - onnxruntime_providers_qnn_ep_cc_srcs CONFIGURE_DEPENDS - "${ONNXRUNTIME_ROOT}/core/providers/qnn/*.h" - "${ONNXRUNTIME_ROOT}/core/providers/qnn/*.cc" + onnxruntime_providers_qnn_ep_srcs CONFIGURE_DEPENDS + "${ONNXRUNTIME_ROOT}/core/providers/qnn/*.h" + "${ONNXRUNTIME_ROOT}/core/providers/qnn/*.cc" ) - file(GLOB_RECURSE - onnxruntime_providers_qnn_builder_cc_srcs CONFIGURE_DEPENDS - "${ONNXRUNTIME_ROOT}/core/providers/qnn/builder/*.h" - "${ONNXRUNTIME_ROOT}/core/providers/qnn/builder/*.cc" - ) + if(onnxruntime_BUILD_QNN_EP_STATIC_LIB) + # + # Build QNN EP as a static library + # + set(onnxruntime_providers_qnn_srcs ${onnxruntime_providers_qnn_ep_srcs}) + source_group(TREE ${ONNXRUNTIME_ROOT}/core FILES ${onnxruntime_providers_qnn_srcs}) + onnxruntime_add_static_library(onnxruntime_providers_qnn ${onnxruntime_providers_qnn_srcs}) + onnxruntime_add_include_to_target(onnxruntime_providers_qnn onnxruntime_common onnxruntime_framework onnx + onnx_proto protobuf::libprotobuf-lite + flatbuffers::flatbuffers Boost::mp11) + add_dependencies(onnxruntime_providers_qnn onnx ${onnxruntime_EXTERNAL_DEPENDENCIES}) + set_target_properties(onnxruntime_providers_qnn PROPERTIES CXX_STANDARD_REQUIRED ON) + set_target_properties(onnxruntime_providers_qnn PROPERTIES FOLDER "ONNXRuntime") + target_include_directories(onnxruntime_providers_qnn PRIVATE ${ONNXRUNTIME_ROOT} + ${onnxruntime_QNN_HOME}/include/QNN + ${onnxruntime_QNN_HOME}/include) + set_target_properties(onnxruntime_providers_qnn PROPERTIES LINKER_LANGUAGE CXX) - set(onnxruntime_providers_qnn_cc_srcs - ${onnxruntime_providers_shared_utils_cc_srcs} - ${onnxruntime_providers_qnn_ep_cc_srcs} - ${onnxruntime_providers_qnn_builder_cc_srcs} - ) + # ignore the warning unknown-pragmas on "pragma region" + if(NOT MSVC) + target_compile_options(onnxruntime_providers_qnn PRIVATE "-Wno-unknown-pragmas") + endif() + else() + # + # Build QNN EP as a shared library + # + file(GLOB_RECURSE + onnxruntime_providers_qnn_shared_lib_srcs CONFIGURE_DEPENDS + "${ONNXRUNTIME_ROOT}/core/providers/shared_library/*.h" + "${ONNXRUNTIME_ROOT}/core/providers/shared_library/*.cc" + ) + set(onnxruntime_providers_qnn_srcs ${onnxruntime_providers_qnn_ep_srcs} + ${onnxruntime_providers_qnn_shared_lib_srcs}) + + source_group(TREE ${ONNXRUNTIME_ROOT}/core FILES ${onnxruntime_providers_qnn_srcs}) + onnxruntime_add_shared_library_module(onnxruntime_providers_qnn ${onnxruntime_providers_qnn_srcs}) + onnxruntime_add_include_to_target(onnxruntime_providers_qnn ${ONNXRUNTIME_PROVIDERS_SHARED} ${GSL_TARGET} onnx + onnxruntime_common Boost::mp11 safeint_interface) + target_link_libraries(onnxruntime_providers_qnn PRIVATE ${ONNXRUNTIME_PROVIDERS_SHARED} ${ABSEIL_LIBS} ${CMAKE_DL_LIBS}) + add_dependencies(onnxruntime_providers_qnn onnxruntime_providers_shared ${onnxruntime_EXTERNAL_DEPENDENCIES}) + target_include_directories(onnxruntime_providers_qnn PRIVATE ${ONNXRUNTIME_ROOT} + ${CMAKE_CURRENT_BINARY_DIR} + ${onnxruntime_QNN_HOME}/include/QNN + ${onnxruntime_QNN_HOME}/include) + + # Set linker flags for function(s) exported by EP DLL + if(UNIX) + target_link_options(onnxruntime_providers_qnn PRIVATE + "LINKER:--version-script=${ONNXRUNTIME_ROOT}/core/providers/qnn/version_script.lds" + "LINKER:--gc-sections" + "LINKER:-rpath=\$ORIGIN" + ) + elseif(WIN32) + set_property(TARGET onnxruntime_providers_qnn APPEND_STRING PROPERTY LINK_FLAGS + "-DEF:${ONNXRUNTIME_ROOT}/core/providers/qnn/symbols.def") + else() + message(FATAL_ERROR "onnxruntime_providers_qnn unknown platform, need to specify shared library exports for it") + endif() + + # Set compile options + if(MSVC) + target_compile_options(onnxruntime_providers_qnn PUBLIC /wd4099 /wd4005) + else() + # ignore the warning unknown-pragmas on "pragma region" + target_compile_options(onnxruntime_providers_qnn PRIVATE "-Wno-unknown-pragmas") + endif() + + set_target_properties(onnxruntime_providers_qnn PROPERTIES LINKER_LANGUAGE CXX) + set_target_properties(onnxruntime_providers_qnn PROPERTIES CXX_STANDARD_REQUIRED ON) + set_target_properties(onnxruntime_providers_qnn PROPERTIES FOLDER "ONNXRuntime") - source_group(TREE ${ONNXRUNTIME_ROOT}/core FILES ${onnxruntime_providers_qnn_cc_srcs}) - onnxruntime_add_static_library(onnxruntime_providers_qnn ${onnxruntime_providers_qnn_cc_srcs}) - onnxruntime_add_include_to_target(onnxruntime_providers_qnn onnxruntime_common onnxruntime_framework onnx onnx_proto protobuf::libprotobuf-lite flatbuffers::flatbuffers Boost::mp11) - target_link_libraries(onnxruntime_providers_qnn) - add_dependencies(onnxruntime_providers_qnn onnx ${onnxruntime_EXTERNAL_DEPENDENCIES}) - set_target_properties(onnxruntime_providers_qnn PROPERTIES CXX_STANDARD_REQUIRED ON) - set_target_properties(onnxruntime_providers_qnn PROPERTIES FOLDER "ONNXRuntime") - target_include_directories(onnxruntime_providers_qnn PRIVATE ${ONNXRUNTIME_ROOT} ${onnxruntime_QNN_HOME}/include/QNN ${onnxruntime_QNN_HOME}/include) - set_target_properties(onnxruntime_providers_qnn PROPERTIES LINKER_LANGUAGE CXX) - # ignore the warning unknown-pragmas on "pragma region" - if(NOT MSVC) - target_compile_options(onnxruntime_providers_qnn PRIVATE "-Wno-unknown-pragmas") + install(TARGETS onnxruntime_providers_qnn + ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR} + LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR} + RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR}) endif() diff --git a/cmake/onnxruntime_providers_webgpu.cmake b/cmake/onnxruntime_providers_webgpu.cmake index e527d538d8757..4bbca7b1b811a 100644 --- a/cmake/onnxruntime_providers_webgpu.cmake +++ b/cmake/onnxruntime_providers_webgpu.cmake @@ -21,44 +21,72 @@ source_group(TREE ${REPO_ROOT} FILES ${onnxruntime_providers_webgpu_cc_srcs}) onnxruntime_add_static_library(onnxruntime_providers_webgpu ${onnxruntime_providers_webgpu_cc_srcs}) onnxruntime_add_include_to_target(onnxruntime_providers_webgpu - onnxruntime_common dawn::dawncpp_headers dawn::dawn_headers onnx onnx_proto flatbuffers::flatbuffers Boost::mp11 safeint_interface) + onnxruntime_common onnx onnx_proto flatbuffers::flatbuffers Boost::mp11 safeint_interface) - set(onnxruntime_providers_webgpu_dll_deps) - - if (onnxruntime_BUILD_DAWN_MONOLITHIC_LIBRARY) - target_link_libraries(onnxruntime_providers_webgpu dawn::webgpu_dawn) + if (CMAKE_SYSTEM_NAME STREQUAL "Emscripten") + # target "emdawnwebgpu_c" is created by Dawn, including "-fno-exceptions" in its compile options by default. + # + # in ONNX Runtime build, "-s DISABLE_EXCEPTION_CATCHING=0" is appended to CMAKE_CXX_FLAGS by default unless build flag + # "--disable_wasm_exception_catching" is specified. It is not compatible with "-fno-exceptions". + # + # if "-s DISABLE_EXCEPTION_CATCHING=0" is set, we need to remove "-fno-exceptions" from emdawnwebgpu_c + if (CMAKE_CXX_FLAGS MATCHES "DISABLE_EXCEPTION_CATCHING=0") + get_property(EM_DAWN_WEBGPU_C_COMPILE_OPTIONS TARGET emdawnwebgpu_c PROPERTY COMPILE_OPTIONS) + list(REMOVE_ITEM EM_DAWN_WEBGPU_C_COMPILE_OPTIONS "-fno-exceptions") + set_property(TARGET emdawnwebgpu_c PROPERTY COMPILE_OPTIONS ${EM_DAWN_WEBGPU_C_COMPILE_OPTIONS}) + endif() - if (WIN32) - if (onnxruntime_ENABLE_DELAY_LOADING_WIN_DLLS) - list(APPEND onnxruntime_DELAYLOAD_FLAGS "/DELAYLOAD:webgpu_dawn.dll") - endif() + # target "emdawnwebgpu_cpp" is created by Dawn. When it is linked to onnxruntime_providers_webgpu as "PUBLIC" + # dependency, a few build/link flags will be set automatically to make sure emscripten can generate correct + # WebAssembly/JavaScript code for WebGPU support. + target_link_libraries(onnxruntime_providers_webgpu PUBLIC emdawnwebgpu_cpp) - list(APPEND onnxruntime_providers_webgpu_dll_deps "$") - endif() + # ASYNCIFY is required for WGPUFuture support (ie. async functions in WebGPU API) + target_link_options(onnxruntime_providers_webgpu PUBLIC + "SHELL:-s ASYNCIFY=1" + "SHELL:-s ASYNCIFY_STACK_SIZE=65536" + ) else() - if (NOT onnxruntime_USE_EXTERNAL_DAWN) - target_link_libraries(onnxruntime_providers_webgpu dawn::dawn_native) + onnxruntime_add_include_to_target(onnxruntime_providers_webgpu dawn::dawncpp_headers dawn::dawn_headers) + + set(onnxruntime_providers_webgpu_dll_deps) + + if (onnxruntime_BUILD_DAWN_MONOLITHIC_LIBRARY) + target_link_libraries(onnxruntime_providers_webgpu dawn::webgpu_dawn) + + if (WIN32) + if (onnxruntime_ENABLE_DELAY_LOADING_WIN_DLLS) + list(APPEND onnxruntime_DELAYLOAD_FLAGS "/DELAYLOAD:webgpu_dawn.dll") + endif() + + list(APPEND onnxruntime_providers_webgpu_dll_deps "$") + endif() + else() + if (NOT onnxruntime_USE_EXTERNAL_DAWN) + target_link_libraries(onnxruntime_providers_webgpu dawn::dawn_native) + endif() + target_link_libraries(onnxruntime_providers_webgpu dawn::dawn_proc) endif() - target_link_libraries(onnxruntime_providers_webgpu dawn::dawn_proc) - endif() - if (WIN32 AND onnxruntime_ENABLE_DAWN_BACKEND_D3D12) - # Ensure dxil.dll and dxcompiler.dll exist in the output directory $ - add_dependencies(onnxruntime_providers_webgpu copy_dxil_dll) - add_dependencies(onnxruntime_providers_webgpu dxcompiler) + if (WIN32 AND onnxruntime_ENABLE_DAWN_BACKEND_D3D12) + # Ensure dxil.dll and dxcompiler.dll exist in the output directory $ + add_dependencies(onnxruntime_providers_webgpu copy_dxil_dll) + add_dependencies(onnxruntime_providers_webgpu dxcompiler) - list(APPEND onnxruntime_providers_webgpu_dll_deps "$/dxil.dll") - list(APPEND onnxruntime_providers_webgpu_dll_deps "$/dxcompiler.dll") - endif() + list(APPEND onnxruntime_providers_webgpu_dll_deps "$/dxil.dll") + list(APPEND onnxruntime_providers_webgpu_dll_deps "$/dxcompiler.dll") + endif() - if (onnxruntime_providers_webgpu_dll_deps) - # Copy dependency DLLs to the output directory - add_custom_command( - TARGET onnxruntime_providers_webgpu - POST_BUILD - COMMAND ${CMAKE_COMMAND} -E copy_if_different "${onnxruntime_providers_webgpu_dll_deps}" "$" - COMMAND_EXPAND_LISTS - VERBATIM ) + if (onnxruntime_providers_webgpu_dll_deps) + # Copy dependency DLLs to the output directory + add_custom_command( + TARGET onnxruntime_providers_webgpu + POST_BUILD + COMMAND ${CMAKE_COMMAND} -E copy_if_different "${onnxruntime_providers_webgpu_dll_deps}" "$" + COMMAND_EXPAND_LISTS + VERBATIM ) + endif() endif() + add_dependencies(onnxruntime_providers_webgpu ${onnxruntime_EXTERNAL_DEPENDENCIES}) set_target_properties(onnxruntime_providers_webgpu PROPERTIES FOLDER "ONNXRuntime") diff --git a/cmake/onnxruntime_python.cmake b/cmake/onnxruntime_python.cmake index 5b29d1093aa5c..15a2862cede0c 100644 --- a/cmake/onnxruntime_python.cmake +++ b/cmake/onnxruntime_python.cmake @@ -169,9 +169,7 @@ if (onnxruntime_ENABLE_LAZY_TENSOR) endif() endif() -target_link_libraries(onnxruntime_pybind11_state PRIVATE - onnxruntime_session - ${onnxruntime_libs} +set(onnxruntime_pybind11_state_static_providers ${PROVIDERS_NNAPI} ${PROVIDERS_VSINPU} ${PROVIDERS_XNNPACK} @@ -183,7 +181,16 @@ target_link_libraries(onnxruntime_pybind11_state PRIVATE ${PROVIDERS_XNNPACK} ${PROVIDERS_WEBGPU} ${PROVIDERS_AZURE} - ${PROVIDERS_QNN} +) + +if(onnxruntime_BUILD_QNN_EP_STATIC_LIB) + list(APPEND onnxruntime_pybind11_state_static_providers PRIVATE onnxruntime_providers_qnn) +endif() + +target_link_libraries(onnxruntime_pybind11_state PRIVATE + onnxruntime_session + ${onnxruntime_libs} + ${onnxruntime_pybind11_state_static_providers} onnxruntime_optimizer onnxruntime_providers onnxruntime_util @@ -1000,6 +1007,16 @@ if (onnxruntime_USE_COREML) endif() if (onnxruntime_USE_QNN) + if(NOT onnxruntime_BUILD_QNN_EP_STATIC_LIB) + add_custom_command( + TARGET onnxruntime_pybind11_state POST_BUILD + COMMAND ${CMAKE_COMMAND} -E copy + $ + $ + $/onnxruntime/capi/ + ) + endif() + add_custom_command( TARGET onnxruntime_pybind11_state POST_BUILD COMMAND ${CMAKE_COMMAND} -E copy diff --git a/cmake/onnxruntime_unittests.cmake b/cmake/onnxruntime_unittests.cmake index 9e3ab4d41f416..c727f4b7e381b 100644 --- a/cmake/onnxruntime_unittests.cmake +++ b/cmake/onnxruntime_unittests.cmake @@ -221,19 +221,11 @@ function(AddTest) ) else() set(TEST_NODE_FLAGS) - if (onnxruntime_ENABLE_WEBASSEMBLY_THREADS) - list(APPEND TEST_NODE_FLAGS "--experimental-wasm-threads") - endif() - if (onnxruntime_ENABLE_WEBASSEMBLY_SIMD) - list(APPEND TEST_NODE_FLAGS "--experimental-wasm-simd") - endif() # prefer Node from emsdk so the version is more deterministic if (DEFINED ENV{EMSDK_NODE}) set(NODE_EXECUTABLE $ENV{EMSDK_NODE}) else() - # warning as we don't know what node version is being used and whether things like the TEST_NODE_FLAGS - # will be valid. e.g. "--experimental-wasm-simd" is not valid with node v20 or later. message(WARNING "EMSDK_NODE environment variable was not set. Falling back to system `node`.") set(NODE_EXECUTABLE node) endif() @@ -627,16 +619,13 @@ if(onnxruntime_USE_ARMNN) list(APPEND onnxruntime_test_providers_dependencies onnxruntime_providers_armnn) endif() -set(ONNXRUNTIME_TEST_LIBS - onnxruntime_session - ${ONNXRUNTIME_INTEROP_TEST_LIBS} - ${onnxruntime_libs} - # CUDA, ROCM, TENSORRT, MIGRAPHX, DNNL, and OpenVINO are dynamically loaded at runtime +set(ONNXRUNTIME_TEST_STATIC_PROVIDER_LIBS + # CUDA, ROCM, TENSORRT, MIGRAPHX, DNNL, and OpenVINO are dynamically loaded at runtime. + # QNN EP can be built as either a dynamic and static libs. ${PROVIDERS_NNAPI} ${PROVIDERS_VSINPU} ${PROVIDERS_JS} ${PROVIDERS_WEBGPU} - ${PROVIDERS_QNN} ${PROVIDERS_SNPE} ${PROVIDERS_RKNPU} ${PROVIDERS_DML} @@ -645,6 +634,17 @@ set(ONNXRUNTIME_TEST_LIBS ${PROVIDERS_COREML} ${PROVIDERS_XNNPACK} ${PROVIDERS_AZURE} +) + +if (onnxruntime_BUILD_QNN_EP_STATIC_LIB) + list(APPEND ONNXRUNTIME_TEST_STATIC_PROVIDER_LIBS onnxruntime_providers_qnn) +endif() + +set(ONNXRUNTIME_TEST_LIBS + onnxruntime_session + ${ONNXRUNTIME_INTEROP_TEST_LIBS} + ${onnxruntime_libs} + ${ONNXRUNTIME_TEST_STATIC_PROVIDER_LIBS} onnxruntime_optimizer onnxruntime_providers onnxruntime_util @@ -708,7 +708,9 @@ if(onnxruntime_USE_QNN AND NOT onnxruntime_MINIMAL_BUILD AND NOT onnxruntime_RED list(APPEND onnxruntime_test_framework_src_patterns ${TEST_SRC_DIR}/providers/qnn/*) list(APPEND onnxruntime_test_framework_libs onnxruntime_providers_qnn) list(APPEND onnxruntime_test_providers_dependencies onnxruntime_providers_qnn) - list(APPEND onnxruntime_test_providers_libs onnxruntime_providers_qnn) + if(NOT onnxruntime_BUILD_QNN_EP_STATIC_LIB) + list(APPEND onnxruntime_test_providers_dependencies onnxruntime_providers_shared) + endif() endif() if(onnxruntime_USE_SNPE) diff --git a/cmake/onnxruntime_webassembly.cmake b/cmake/onnxruntime_webassembly.cmake index 66268cefac9ef..6cd05d1ad024b 100644 --- a/cmake/onnxruntime_webassembly.cmake +++ b/cmake/onnxruntime_webassembly.cmake @@ -111,6 +111,7 @@ if (onnxruntime_BUILD_WEBASSEMBLY_STATIC_LIB) ${PROVIDERS_JS} ${PROVIDERS_XNNPACK} ${PROVIDERS_WEBNN} + ${PROVIDERS_WEBGPU} onnxruntime_session onnxruntime_util re2::re2 @@ -188,6 +189,7 @@ else() ${PROVIDERS_JS} ${PROVIDERS_XNNPACK} ${PROVIDERS_WEBNN} + ${PROVIDERS_WEBGPU} onnxruntime_session onnxruntime_util re2::re2 @@ -380,10 +382,19 @@ jsepDownload:_pp_") "SHELL:--pre-js \"${ONNXRUNTIME_ROOT}/wasm/pre-jsep.js\"" "SHELL:-s ASYNCIFY=1" "SHELL:-s ASYNCIFY_STACK_SIZE=65536" - "SHELL:-s ASYNCIFY_EXPORTS=['OrtRun']" - "SHELL:-s ASYNCIFY_IMPORTS=['Module.jsepCopy','Module.jsepCopyAsync','jsepDownload']" ) set_target_properties(onnxruntime_webassembly PROPERTIES LINK_DEPENDS ${ONNXRUNTIME_ROOT}/wasm/pre-jsep.js) + + if (onnxruntime_ENABLE_WEBASSEMBLY_MEMORY64) + target_link_options(onnxruntime_webassembly PRIVATE + "SHELL:-s ASYNCIFY_EXPORTS=['OrtRun']" + "SHELL:-s ASYNCIFY_IMPORTS=['Module.jsepCopy','Module.jsepCopyAsync','jsepDownload']" + ) + endif() + endif() + + if (onnxruntime_USE_WEBGPU) + target_compile_definitions(onnxruntime_webassembly PRIVATE USE_WEBGPU=1) endif() if (onnxruntime_EMSCRIPTEN_SETTINGS) diff --git a/cmake/patches/coremltools/crossplatformbuild.patch b/cmake/patches/coremltools/crossplatformbuild.patch index 7f2268f50c82e..832191b366d4d 100644 --- a/cmake/patches/coremltools/crossplatformbuild.patch +++ b/cmake/patches/coremltools/crossplatformbuild.patch @@ -3,7 +3,7 @@ index adc7bfcf..7b2bf9cc 100644 --- a/mlmodel/src/MILBlob/Blob/FileWriter.cpp +++ b/mlmodel/src/MILBlob/Blob/FileWriter.cpp @@ -8,8 +8,12 @@ - + #include #include + @@ -12,17 +12,31 @@ index adc7bfcf..7b2bf9cc 100644 #include #include +#endif - + using namespace MILBlob; using namespace MILBlob::Blob; +diff --git a/mlmodel/src/MILBlob/Blob/FileWriter.hpp b/mlmodel/src/MILBlob/Blob/FileWriter.hpp +index 2bc99403..49239513 100644 +--- a/mlmodel/src/MILBlob/Blob/FileWriter.hpp ++++ b/mlmodel/src/MILBlob/Blob/FileWriter.hpp +@@ -6,7 +6,8 @@ + #pragma once + + #include "MILBlob/Util/Span.hpp" +- ++// ORT_EDIT: add missing header ++#include + #include + #include + #include diff --git a/mlmodel/src/MILBlob/Fp16.cpp b/mlmodel/src/MILBlob/Fp16.cpp index ae1e71a1..77a7161f 100644 --- a/mlmodel/src/MILBlob/Fp16.cpp +++ b/mlmodel/src/MILBlob/Fp16.cpp @@ -5,6 +5,8 @@ - + #include "MILBlob/Fp16.hpp" - + +// ORT_EDIT: Exclude clang specific pragmas from other builds +#if defined(__clang__) // fp16 lib code has some conversion warnings we don't want to globally ignore @@ -35,11 +49,11 @@ index ae1e71a1..77a7161f 100644 +#else +#include "fp16/fp16.h" +#endif - + using namespace MILBlob; - + diff --git a/modelpackage/src/ModelPackage.cpp b/modelpackage/src/ModelPackage.cpp -index 8fee56b9..99e0d8d6 100644 +index 8fee56b9..5508e316 100644 --- a/modelpackage/src/ModelPackage.cpp +++ b/modelpackage/src/ModelPackage.cpp @@ -26,7 +26,14 @@ namespace std { @@ -55,22 +69,22 @@ index 8fee56b9..99e0d8d6 100644 #include +#endif #include - + #if defined(__cplusplus) @@ -187,7 +194,10 @@ public: ModelPackageItemInfo createFile(const std::string& name, const std::string& author, const std::string& description); }; - + +// ORT_EDIT: pragma only available on APPLE platforms +#if defined(__APPLE__) #pragma mark ModelPackageImpl +#endif - + ModelPackageImpl::ModelPackageImpl(const std::filesystem::path& path, bool createIfNecessary, bool readOnly) : m_packagePath(path), @@ -372,6 +382,20 @@ std::filesystem::path ModelPackageImpl::getItemPath(const std::string& name, con } - + std::string ModelPackageImpl::generateIdentifier() const { +// ORT_EDIT: Use built-in UUID generation on Windows +#if defined(_WIN32) @@ -87,20 +101,20 @@ index 8fee56b9..99e0d8d6 100644 + return uuidStrCpp; +#else uuid_t uuid; - + // uuid_unparse generates a 36-character null-terminated string (37 bytes). @@ -383,6 +407,7 @@ std::string ModelPackageImpl::generateIdentifier() const { uuid_unparse(uuid, buf); - + return std::string(buf); +#endif } - + ModelPackageItemInfo ModelPackageImpl::createFile(const std::string& name, const std::string& author, const std::string& description) { -@@ -468,7 +493,13 @@ std::shared_ptr ModelPackageImpl::findItem(const std::stri +@@ -468,7 +493,14 @@ std::shared_ptr ModelPackageImpl::findItem(const std::stri auto author = itemInfoEntry->getString(kModelPackageItemInfoAuthorKey); auto description = itemInfoEntry->getString(kModelPackageItemInfoDescriptionKey); - + +// ORT_EDIT: need to use path.string() on Windows +#if defined(_WIN32) + return std::make_shared(std::make_shared(identifier, path.string(), name, author, description)); @@ -108,12 +122,13 @@ index 8fee56b9..99e0d8d6 100644 +#else return std::make_shared(std::make_shared(identifier, path, name, author, description)); +#endif ++ } - + std::shared_ptr ModelPackageImpl::findItem(const std::string& name, const std::string& author) const -@@ -514,7 +545,9 @@ void ModelPackageImpl::removeItem(const std::string& identifier) +@@ -514,7 +546,9 @@ void ModelPackageImpl::removeItem(const std::string& identifier) } - + auto path = m_packageDataDirPath / itemInfoEntry->getString(kModelPackageItemInfoPathKey); - if (0 != std::remove(path.c_str())) { + // ORT_EDIT: std::remove doesn't work on Windows. Use std::filesystem::remove instead. @@ -121,8 +136,8 @@ index 8fee56b9..99e0d8d6 100644 + if (!std::filesystem::remove(path)) { throw std::runtime_error("Failed to remove file at path: " + path.string()); } - -@@ -525,13 +558,16 @@ bool ModelPackageImpl::isValid(const std::filesystem::path& path) + +@@ -525,13 +559,16 @@ bool ModelPackageImpl::isValid(const std::filesystem::path& path) { try { ModelPackageImpl(path, false, true); @@ -132,16 +147,16 @@ index 8fee56b9..99e0d8d6 100644 } return true; } - + +// ORT_EDIT: pragma only available on APPLE platforms +#if defined(__APPLE__) #pragma mark ModelPackage +#endif - + ModelPackage::ModelPackage(const std::string& packagePath, bool createIfNecessary, bool readOnly) : m_modelPackageImpl(std::make_shared(packagePath, createIfNecessary, readOnly)) -@@ -544,7 +580,12 @@ ModelPackage::~ModelPackage() - +@@ -544,7 +581,12 @@ ModelPackage::~ModelPackage() + std::string ModelPackage::path() const { +// ORT_EDIT: Windows doesn't automatically convert to std::string as the native format could be char or wchar. @@ -151,5 +166,19 @@ index 8fee56b9..99e0d8d6 100644 return m_modelPackageImpl->path(); +#endif } - + std::string ModelPackage::setRootModel(const std::string& path, const std::string& name, const std::string& author, const std::string& description) +diff --git a/modelpackage/src/utils/JsonMap.hpp b/modelpackage/src/utils/JsonMap.hpp +index 0d7dc3f4..b700cfd5 100644 +--- a/modelpackage/src/utils/JsonMap.hpp ++++ b/modelpackage/src/utils/JsonMap.hpp +@@ -10,7 +10,8 @@ + #include + #include + #include +- ++// ORT_EDIT: add missing header ++#include + class JsonMapImpl; + + class JsonMap { diff --git a/cmake/patches/emscripten/patch_3.1.74.tgz b/cmake/patches/emscripten/patch_3.1.74.tgz new file mode 100644 index 0000000000000..cfb52de638653 Binary files /dev/null and b/cmake/patches/emscripten/patch_3.1.74.tgz differ diff --git a/cmake/patches/xnnpack/AddEmscriptenAndIosSupport.patch b/cmake/patches/xnnpack/AddEmscriptenAndIosSupport.patch index 3abf2d3afec42..c9cb4bcad9e20 100644 --- a/cmake/patches/xnnpack/AddEmscriptenAndIosSupport.patch +++ b/cmake/patches/xnnpack/AddEmscriptenAndIosSupport.patch @@ -1,8 +1,8 @@ diff --git a/CMakeLists.txt b/CMakeLists.txt -index 1ff85b538..c3ef2183f 100644 +index f0b3410ae..1e3cb8178 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt -@@ -253,7 +253,7 @@ ENDIF() +@@ -337,7 +337,7 @@ ENDIF() # ---[ Build flags IF(NOT CMAKE_SYSTEM_NAME) MESSAGE(FATAL_ERROR "CMAKE_SYSTEM_NAME not defined") @@ -11,21 +11,21 @@ index 1ff85b538..c3ef2183f 100644 MESSAGE(FATAL_ERROR "Unrecognized CMAKE_SYSTEM_NAME value \"${CMAKE_SYSTEM_NAME}\"") ENDIF() IF(CMAKE_SYSTEM_NAME MATCHES "Windows") -@@ -763,7 +763,12 @@ IF(XNNPACK_BUILD_LIBRARY) - TARGET_LINK_LIBRARIES(operator-run PRIVATE xnnpack-base logging) +@@ -848,7 +848,12 @@ IF(XNNPACK_BUILD_LIBRARY) TARGET_LINK_LIBRARIES(operator-utils PRIVATE xnnpack-base logging) - TARGET_LINK_LIBRARIES(subgraph PRIVATE xnnpack-base allocator logging memory mutex operators operator-run) -- TARGET_LINK_LIBRARIES(XNNPACK PRIVATE allocator cache hardware-config indirection logging memory microkernel-utils microparams-init mutex normalization operators operator-run operator-utils packing microkernels-prod subgraph) + TARGET_LINK_LIBRARIES(reference-ukernels PRIVATE xnnpack-base) + TARGET_LINK_LIBRARIES(subgraph PRIVATE xnnpack-base allocator logging memory mutex operators operator-run datatype) +- TARGET_LINK_LIBRARIES(XNNPACK PRIVATE xnnpack-base allocator cache hardware-config indirection memory microkernel-utils microparams-init mutex normalization operators operator-run operator-utils packing microkernels-prod subgraph datatype reference-ukernels) + IF(CMAKE_SYSTEM_NAME STREQUAL "Emscripten") -+ # omit microkernels-prod as the list is manually created by ORT in cmake/external/xnnpack.cmake -+ TARGET_LINK_LIBRARIES(XNNPACK PRIVATE allocator cache hardware-config indirection logging memory microkernel-utils microparams-init mutex normalization operators operator-run operator-utils packing subgraph) ++ # omit microkernels-prod as the list is manually created by ORT in cmake/external/xnnpack.cmake ++ TARGET_LINK_LIBRARIES(XNNPACK PRIVATE xnnpack-base allocator cache hardware-config indirection memory microkernel-utils microparams-init mutex normalization operators operator-run operator-utils packing subgraph datatype reference-ukernels) + ELSE() -+ TARGET_LINK_LIBRARIES(XNNPACK PRIVATE allocator cache hardware-config indirection logging memory microkernel-utils microparams-init mutex normalization operators operator-run operator-utils packing microkernels-prod subgraph) -+ ENDIF() - TARGET_LINK_LIBRARIES(XNNPACK PUBLIC xnnpack-base) ++ TARGET_LINK_LIBRARIES(XNNPACK PRIVATE xnnpack-base allocator cache hardware-config indirection memory microkernel-utils microparams-init mutex normalization operators operator-run operator-utils packing microkernels-prod subgraph datatype reference-ukernels) ++ ENDIF() + TARGET_LINK_LIBRARIES(XNNPACK PUBLIC pthreadpool logging) SET_TARGET_PROPERTIES(XNNPACK PROPERTIES C_EXTENSIONS YES) ENDIF() -@@ -772,7 +777,8 @@ IF(NOT MSVC) +@@ -857,7 +862,8 @@ IF(NOT MSVC) ENDIF() IF(XNNPACK_TARGET_PROCESSOR STREQUAL "arm") SET_PROPERTY(SOURCE ${ALL_MICROKERNEL_SRCS} APPEND_STRING PROPERTY COMPILE_FLAGS " -marm ") diff --git a/cmake/vcpkg-ports/cpuinfo/portfile.cmake b/cmake/vcpkg-ports/cpuinfo/portfile.cmake new file mode 100644 index 0000000000000..e61308bf643b4 --- /dev/null +++ b/cmake/vcpkg-ports/cpuinfo/portfile.cmake @@ -0,0 +1,63 @@ +# On Windows, we can get a cpuinfo.dll, but it exports no symbols. +if(VCPKG_TARGET_IS_WINDOWS) + vcpkg_check_linkage(ONLY_STATIC_LIBRARY) +endif() + +vcpkg_from_github( + OUT_SOURCE_PATH SOURCE_PATH + REPO pytorch/cpuinfo + REF 8a1772a0c5c447df2d18edf33ec4603a8c9c04a6 + SHA512 b94ccbfa886221d6bb16513d074675af0a72928a9dd9485dcacdc1124a8a60aacbbe91913a1579e766dfb024f0be1d52eeead40342004ff0238a8b94a095ed08 + HEAD_REF master +) + +vcpkg_check_features(OUT_FEATURE_OPTIONS FEATURE_OPTIONS + FEATURES + tools CPUINFO_BUILD_TOOLS +) + +set(LINK_OPTIONS "") +if(VCPKG_LIBRARY_LINKAGE STREQUAL "dynamic") + list(APPEND LINK_OPTIONS -DCPUINFO_LIBRARY_TYPE=shared) +else() + list(APPEND LINK_OPTIONS -DCPUINFO_LIBRARY_TYPE=static) +endif() + +if(VCPKG_CRT_LINKAGE STREQUAL "dynamic") + list(APPEND LINK_OPTIONS -DCPUINFO_RUNTIME_TYPE=shared) +else() + list(APPEND LINK_OPTIONS -DCPUINFO_RUNTIME_TYPE=static) +endif() + +vcpkg_cmake_configure( + SOURCE_PATH "${SOURCE_PATH}" + OPTIONS + ${FEATURE_OPTIONS} + ${LINK_OPTIONS} + -DCPUINFO_BUILD_UNIT_TESTS=OFF + -DCPUINFO_BUILD_MOCK_TESTS=OFF + -DCPUINFO_BUILD_BENCHMARKS=OFF + OPTIONS_DEBUG + -DCPUINFO_LOG_LEVEL=debug + OPTIONS_RELEASE + -DCPUINFO_LOG_LEVEL=default +) +vcpkg_cmake_install() +vcpkg_cmake_config_fixup() +vcpkg_copy_pdbs() +vcpkg_fixup_pkgconfig() # pkg_check_modules(libcpuinfo) + +file(REMOVE_RECURSE "${CURRENT_PACKAGES_DIR}/debug/include") + +if("tools" IN_LIST FEATURES) + set(additional_tools "") + if(EXISTS "${CURRENT_PACKAGES_DIR}/bin/cpuid-dump${VCPKG_TARGET_EXECUTABLE_SUFFIX}") + list(APPEND additional_tools "cpuid-dump") + endif() + vcpkg_copy_tools( + TOOL_NAMES cache-info cpu-info isa-info ${additional_tools} + AUTO_CLEAN + ) +endif() + +file(INSTALL "${SOURCE_PATH}/LICENSE" DESTINATION "${CURRENT_PACKAGES_DIR}/share/${PORT}" RENAME copyright) diff --git a/cmake/vcpkg-ports/cpuinfo/vcpkg.json b/cmake/vcpkg-ports/cpuinfo/vcpkg.json new file mode 100644 index 0000000000000..ce93591dba5ac --- /dev/null +++ b/cmake/vcpkg-ports/cpuinfo/vcpkg.json @@ -0,0 +1,25 @@ +{ + "name": "cpuinfo", + "version-date": "2024-12-09", + "port-version": 3, + "description": "CPU INFOrmation library (x86/x86-64/ARM/ARM64, Linux/Windows/Android/macOS/iOS)", + "homepage": "https://github.com/pytorch/cpuinfo", + "license": "BSD-2-Clause", + "supports": "!(uwp & arm32)", + "dependencies": [ + { + "name": "vcpkg-cmake", + "host": true + }, + { + "name": "vcpkg-cmake-config", + "host": true + } + ], + "features": { + "tools": { + "description": "Build cpuinfo command-line tools", + "supports": "!uwp" + } + } +} diff --git a/cmake/vcpkg-ports/onnx/fix-cmakelists.patch b/cmake/vcpkg-ports/onnx/fix-cmakelists.patch new file mode 100644 index 0000000000000..f8d300103ab20 --- /dev/null +++ b/cmake/vcpkg-ports/onnx/fix-cmakelists.patch @@ -0,0 +1,67 @@ +diff --git a/CMakeLists.txt b/CMakeLists.txt +index 4dd56b6..2ff3e29 100644 +--- a/CMakeLists.txt ++++ b/CMakeLists.txt +@@ -65,6 +65,27 @@ endif() + + include(GNUInstallDirs) + ++# install protobuf files ++install(FILES ${CMAKE_CURRENT_BINARY_DIR}/onnx/onnx-data.proto ++ ${CMAKE_CURRENT_BINARY_DIR}/onnx/onnx-data.proto3 ++ ${CMAKE_CURRENT_BINARY_DIR}/onnx/onnx-ml.proto ++ ${CMAKE_CURRENT_BINARY_DIR}/onnx/onnx-ml.proto3 ++ ${CMAKE_CURRENT_BINARY_DIR}/onnx/onnx-operators-ml.proto ++ ${CMAKE_CURRENT_BINARY_DIR}/onnx/onnx-operators-ml.proto3 ++ DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/onnx ++) ++# install python files ++if(BUILD_ONNX_PYTHON) ++ install(FILES ${CMAKE_CURRENT_BINARY_DIR}/onnx/onnx_data_pb.py ++ ${CMAKE_CURRENT_BINARY_DIR}/onnx/onnx_data_pb2.py ++ ${CMAKE_CURRENT_BINARY_DIR}/onnx/onnx_ml_pb2.py ++ ${CMAKE_CURRENT_BINARY_DIR}/onnx/onnx_operators_ml_pb2.py ++ ${CMAKE_CURRENT_BINARY_DIR}/onnx/onnx_operators_pb.py ++ ${CMAKE_CURRENT_BINARY_DIR}/onnx/onnx_pb.py ++ DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/onnx ++ ) ++endif() ++ + set(ONNX_ROOT ${PROJECT_SOURCE_DIR}) + + # Read ONNX version +@@ -116,7 +137,8 @@ endif() + # find_package Python has replaced PythonInterp and PythonLibs since cmake 3.12 + # Use the following command in the future; now this is only compatible with the latest pybind11 + # find_package(Python ${PY_VERSION} COMPONENTS Interpreter Development REQUIRED) +-find_package(PythonInterp ${PY_VERSION} REQUIRED) ++find_package(Python3 ${PY_VERSION} COMPONENTS Interpreter REQUIRED) ++set(PYTHON_EXECUTABLE ${Python3_EXECUTABLE}) + if(BUILD_ONNX_PYTHON) + find_package(PythonLibs ${PY_VERSION}) + endif() +@@ -434,6 +456,7 @@ target_link_libraries(onnx PUBLIC onnx_proto) + add_onnx_global_defines(onnx) + + if(BUILD_ONNX_PYTHON) ++ find_package(Python3 ${PY_VERSION} COMPONENTS Development REQUIRED) + if("${PY_EXT_SUFFIX}" STREQUAL "") + if(MSVC) + set(PY_EXT_SUFFIX ".pyd") +@@ -452,10 +475,14 @@ if(BUILD_ONNX_PYTHON) + target_include_directories(onnx_cpp2py_export PRIVATE + $ + $ +- $) ++ ${Python3_INCLUDE_DIRS}) ++ target_link_directories(onnx_cpp2py_export PRIVATE ++ ${Python3_LIBRARY_DIRS}) ++ target_link_libraries(onnx_cpp2py_export PRIVATE ++ ${Python3_LIBRARIES}) + + # pybind11 is a header only lib +- find_package(pybind11 2.2 CONFIG) ++ find_package(pybind11 2.2 CONFIG REQUIRED) + if(NOT pybind11_FOUND) + if(EXISTS "${ONNX_ROOT}/third_party/pybind11/include/pybind11/pybind11.h") + add_subdirectory("${ONNX_ROOT}/third_party/pybind11") diff --git a/cmake/vcpkg-ports/onnx/fix-dependency-protobuf.patch b/cmake/vcpkg-ports/onnx/fix-dependency-protobuf.patch new file mode 100644 index 0000000000000..c435922d0103d --- /dev/null +++ b/cmake/vcpkg-ports/onnx/fix-dependency-protobuf.patch @@ -0,0 +1,28 @@ +diff --git a/CMakeLists.txt b/CMakeLists.txt +index d81ac1d..9f97998 100644 +--- a/CMakeLists.txt ++++ b/CMakeLists.txt +@@ -149,6 +149,7 @@ if(ONNX_BUILD_TESTS) + set(googletest_STATIC_LIBRARIES GTest::gtest) + endif() + ++find_package(protobuf CONFIG REQUIRED) + if((ONNX_USE_LITE_PROTO AND TARGET protobuf::libprotobuf-lite) OR ((NOT ONNX_USE_LITE_PROTO) AND TARGET protobuf::libprotobuf)) + # Sometimes we need to use protoc compiled for host architecture while linking + # libprotobuf against target architecture. See https://github.com/caffe2/caffe +diff --git a/cmake/ONNXConfig.cmake.in b/cmake/ONNXConfig.cmake.in +index d588f8a..dbd4398 100644 +--- a/cmake/ONNXConfig.cmake.in ++++ b/cmake/ONNXConfig.cmake.in +@@ -6,9 +6,8 @@ + # library version information + set(ONNX_VERSION "@ONNX_VERSION@") + +-list(APPEND CMAKE_PREFIX_PATH "@PROTOBUF_DIR@") +-set(Protobuf_INCLUDE_DIR "@PROTOBUF_INCLUDE_DIR@") +-find_package(Protobuf REQUIRED) ++include(CMakeFindDependencyMacro) ++find_dependency(protobuf CONFIG) + + # import targets + include ("${CMAKE_CURRENT_LIST_DIR}/ONNXTargets.cmake") diff --git a/cmake/vcpkg-ports/onnx/portfile.cmake b/cmake/vcpkg-ports/onnx/portfile.cmake new file mode 100644 index 0000000000000..a0c997803870d --- /dev/null +++ b/cmake/vcpkg-ports/onnx/portfile.cmake @@ -0,0 +1,83 @@ +vcpkg_check_linkage(ONLY_STATIC_LIBRARY) + +vcpkg_from_github( + OUT_SOURCE_PATH SOURCE_PATH + REPO onnx/onnx + REF "v${VERSION}" + SHA512 5a18e2b19ec9c18c8b115fb7e12ed98eddaa581c95f15c4dd420cd6c86e7caa04f9a393da589e76b89cf9b3544abd3749a8c77c2446782f37502eb74e9b1f661 + PATCHES + fix-cmakelists.patch + fix-dependency-protobuf.patch +) + +string(COMPARE EQUAL "${VCPKG_CRT_LINKAGE}" "static" USE_STATIC_RUNTIME) + +# ONNX_USE_PROTOBUF_SHARED_LIBS: find the library and check its file extension +find_library(PROTOBUF_LIBPATH NAMES protobuf PATHS "${CURRENT_INSTALLED_DIR}/bin" "${CURRENT_INSTALLED_DIR}/lib" REQUIRED) +get_filename_component(PROTOBUF_LIBNAME "${PROTOBUF_LIBPATH}" NAME) + +set(USE_PROTOBUF_SHARED OFF) + + + +# Like protoc, python is required for codegen. +vcpkg_find_acquire_program(PYTHON3) + +# PATH for .bat scripts so it can find 'python' +get_filename_component(PYTHON_DIR "${PYTHON3}" PATH) +vcpkg_add_to_path(PREPEND "${PYTHON_DIR}") + +vcpkg_cmake_configure( + SOURCE_PATH "${SOURCE_PATH}" + OPTIONS + ${FEATURE_OPTIONS} + -DPython3_EXECUTABLE=${PYTHON3} + -DONNX_ML=ON + -DONNX_GEN_PB_TYPE_STUBS=ON + -DONNX_USE_PROTOBUF_SHARED_LIBS=${USE_PROTOBUF_SHARED} + -DONNX_USE_LITE_PROTO=OFF + -DONNX_USE_MSVC_STATIC_RUNTIME=${USE_STATIC_RUNTIME} + -DONNX_BUILD_TESTS=OFF + -DONNX_BUILD_BENCHMARKS=OFF + -DONNX_DISABLE_STATIC_REGISTRATION=ON + MAYBE_UNUSED_VARIABLES + ONNX_USE_MSVC_STATIC_RUNTIME +) + +vcpkg_cmake_install() +vcpkg_cmake_config_fixup(CONFIG_PATH lib/cmake/ONNX) + +vcpkg_install_copyright(FILE_LIST "${SOURCE_PATH}/LICENSE") + +file(REMOVE_RECURSE + "${CURRENT_PACKAGES_DIR}/debug/include" + "${CURRENT_PACKAGES_DIR}/debug/share" + # the others are empty + "${CURRENT_PACKAGES_DIR}/include/onnx/backend" + "${CURRENT_PACKAGES_DIR}/include/onnx/bin" + "${CURRENT_PACKAGES_DIR}/include/onnx/defs/controlflow" + "${CURRENT_PACKAGES_DIR}/include/onnx/defs/generator" + "${CURRENT_PACKAGES_DIR}/include/onnx/defs/image" + "${CURRENT_PACKAGES_DIR}/include/onnx/defs/logical" + "${CURRENT_PACKAGES_DIR}/include/onnx/defs/math" + "${CURRENT_PACKAGES_DIR}/include/onnx/defs/nn" + "${CURRENT_PACKAGES_DIR}/include/onnx/defs/object_detection" + "${CURRENT_PACKAGES_DIR}/include/onnx/defs/optional" + "${CURRENT_PACKAGES_DIR}/include/onnx/defs/quantization" + "${CURRENT_PACKAGES_DIR}/include/onnx/defs/reduction" + "${CURRENT_PACKAGES_DIR}/include/onnx/defs/rnn" + "${CURRENT_PACKAGES_DIR}/include/onnx/defs/sequence" + "${CURRENT_PACKAGES_DIR}/include/onnx/defs/text" + "${CURRENT_PACKAGES_DIR}/include/onnx/defs/traditionalml" + "${CURRENT_PACKAGES_DIR}/include/onnx/defs/training" + "${CURRENT_PACKAGES_DIR}/include/onnx/examples" + "${CURRENT_PACKAGES_DIR}/include/onnx/frontend" + "${CURRENT_PACKAGES_DIR}/include/onnx/onnx_cpp2py_export" + "${CURRENT_PACKAGES_DIR}/include/onnx/test" + "${CURRENT_PACKAGES_DIR}/include/onnx/tools" + "${CURRENT_PACKAGES_DIR}/include/onnx/onnx_ml" + "${CURRENT_PACKAGES_DIR}/include/onnx/onnx_data" + "${CURRENT_PACKAGES_DIR}/include/onnx/onnx_operators_ml" + "${CURRENT_PACKAGES_DIR}/include/onnx/reference/ops" + "${CURRENT_PACKAGES_DIR}/include/onnx/reference" +) diff --git a/cmake/vcpkg-ports/onnx/vcpkg.json b/cmake/vcpkg-ports/onnx/vcpkg.json new file mode 100644 index 0000000000000..7d2bbd84c05b3 --- /dev/null +++ b/cmake/vcpkg-ports/onnx/vcpkg.json @@ -0,0 +1,23 @@ +{ + "name": "onnx", + "version-semver": "1.17.0", + "description": "Open standard for machine learning interoperability", + "homepage": "https://onnx.ai", + "license": "Apache-2.0", + "supports": "!uwp", + "dependencies": [ + "protobuf", + { + "name": "protobuf", + "host": true + }, + { + "name": "vcpkg-cmake", + "host": true + }, + { + "name": "vcpkg-cmake-config", + "host": true + } + ] +} diff --git a/cmake/vcpkg-ports/pthreadpool/fix-cmakelists.patch b/cmake/vcpkg-ports/pthreadpool/fix-cmakelists.patch new file mode 100644 index 0000000000000..97fd1ac7a2bb1 --- /dev/null +++ b/cmake/vcpkg-ports/pthreadpool/fix-cmakelists.patch @@ -0,0 +1,82 @@ +diff --git a/CMakeLists.txt b/CMakeLists.txt +index f06aada..3c6c6e2 100644 +--- a/CMakeLists.txt ++++ b/CMakeLists.txt +@@ -31,8 +31,6 @@ IF(CCACHE_BINARY) + ENDIF() + + # ---[ Options. +-SET(PTHREADPOOL_LIBRARY_TYPE "default" CACHE STRING "Type of library (shared, static, or default) to build") +-SET_PROPERTY(CACHE PTHREADPOOL_LIBRARY_TYPE PROPERTY STRINGS default static shared) + OPTION(PTHREADPOOL_ALLOW_DEPRECATED_API "Enable deprecated API functions" ON) + SET(PTHREADPOOL_SYNC_PRIMITIVE "default" CACHE STRING "Synchronization primitive (condvar, futex, gcd, event, or default) for worker threads") + SET_PROPERTY(CACHE PTHREADPOOL_SYNC_PRIMITIVE PROPERTY STRINGS default condvar futex gcd event) +@@ -41,7 +39,7 @@ IF(CMAKE_SYSTEM_PROCESSOR MATCHES "^(i[3-6]86|AMD64|x86(_64)?)$") + ELSE() + OPTION(PTHREADPOOL_ENABLE_FASTPATH "Enable fast path using atomic decrement instead of atomic compare-and-swap" OFF) + ENDIF() +-IF("${CMAKE_SOURCE_DIR}" STREQUAL "${PROJECT_SOURCE_DIR}") ++IF(FALSE) + OPTION(PTHREADPOOL_BUILD_TESTS "Build pthreadpool unit tests" ON) + OPTION(PTHREADPOOL_BUILD_BENCHMARKS "Build pthreadpool micro-benchmarks" ON) + ELSE() +@@ -67,7 +65,8 @@ MACRO(PTHREADPOOL_TARGET_ENABLE_CXX11 target) + ENDMACRO() + + # ---[ Download deps +-IF(NOT DEFINED FXDIV_SOURCE_DIR) ++find_path(FXDIV_INCLUDE_DIRS "fxdiv.h") ++IF(FALSE) + MESSAGE(STATUS "Downloading FXdiv to ${CMAKE_BINARY_DIR}/FXdiv-source (define FXDIV_SOURCE_DIR to avoid it)") + CONFIGURE_FILE(cmake/DownloadFXdiv.cmake "${CMAKE_BINARY_DIR}/FXdiv-download/CMakeLists.txt") + EXECUTE_PROCESS(COMMAND "${CMAKE_COMMAND}" -G "${CMAKE_GENERATOR}" . +@@ -118,21 +117,13 @@ ELSE() + ENDIF() + + ADD_LIBRARY(pthreadpool_interface INTERFACE) +-TARGET_INCLUDE_DIRECTORIES(pthreadpool_interface INTERFACE include) ++TARGET_INCLUDE_DIRECTORIES(pthreadpool_interface INTERFACE $ $) + IF(NOT PTHREADPOOL_ALLOW_DEPRECATED_API) + TARGET_COMPILE_DEFINITIONS(pthreadpool_interface INTERFACE PTHREADPOOL_NO_DEPRECATED_API=1) + ENDIF() + INSTALL(FILES include/pthreadpool.h DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}) + +-IF(PTHREADPOOL_LIBRARY_TYPE STREQUAL "default") +- ADD_LIBRARY(pthreadpool ${PTHREADPOOL_SRCS}) +-ELSEIF(PTHREADPOOL_LIBRARY_TYPE STREQUAL "shared") +- ADD_LIBRARY(pthreadpool SHARED ${PTHREADPOOL_SRCS}) +-ELSEIF(PTHREADPOOL_LIBRARY_TYPE STREQUAL "static") +- ADD_LIBRARY(pthreadpool STATIC ${PTHREADPOOL_SRCS}) +-ELSE() +- MESSAGE(FATAL_ERROR "Unsupported library type ${PTHREADPOOL_LIBRARY_TYPE}") +-ENDIF() ++ADD_LIBRARY(pthreadpool ${PTHREADPOOL_SRCS}) + + IF(PTHREADPOOL_SYNC_PRIMITIVE STREQUAL "condvar") + TARGET_COMPILE_DEFINITIONS(pthreadpool PRIVATE PTHREADPOOL_USE_FUTEX=0) +@@ -181,18 +172,22 @@ IF(CMAKE_SYSTEM_NAME STREQUAL "Linux") + ENDIF() + + # ---[ Configure FXdiv +-IF(NOT TARGET fxdiv) ++IF(FALSE) + SET(FXDIV_BUILD_TESTS OFF CACHE BOOL "") + SET(FXDIV_BUILD_BENCHMARKS OFF CACHE BOOL "") + ADD_SUBDIRECTORY( + "${FXDIV_SOURCE_DIR}" + "${CMAKE_BINARY_DIR}/FXdiv") + ENDIF() +-TARGET_LINK_LIBRARIES(pthreadpool PRIVATE fxdiv) ++TARGET_INCLUDE_DIRECTORIES(pthreadpool PRIVATE ${FXDIV_INCLUDE_DIRS}) + +-INSTALL(TARGETS pthreadpool ++INSTALL(TARGETS pthreadpool pthreadpool_interface ++ EXPORT unofficial-pthreadpool-config ++ RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR} + LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR} + ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR}) ++install(EXPORT unofficial-pthreadpool-config NAMESPACE unofficial:: ++ DESTINATION ${CMAKE_INSTALL_DATAROOTDIR}/unofficial-${PROJECT_NAME}) # share/unofficial-pthreadpool + + IF(PTHREADPOOL_BUILD_TESTS) + # ---[ Build google test diff --git a/cmake/vcpkg-ports/pthreadpool/portfile.cmake b/cmake/vcpkg-ports/pthreadpool/portfile.cmake new file mode 100644 index 0000000000000..9400e5e886639 --- /dev/null +++ b/cmake/vcpkg-ports/pthreadpool/portfile.cmake @@ -0,0 +1,25 @@ +if(VCPKG_TARGET_IS_WINDOWS) + vcpkg_check_linkage(ONLY_STATIC_LIBRARY) +endif() + +vcpkg_from_github( + OUT_SOURCE_PATH SOURCE_PATH + REPO google/pthreadpool + REF 4e80ca24521aa0fb3a746f9ea9c3eaa20e9afbb0 + SHA512 776017cc5d2aa94337292f2f4fbd54d099ef29abf736ab8147f07f98f12b7654cbd2fe38d34646a479a519c261ac253bbaf19c6dcbb0ec4cc0859de70f7e6472 + PATCHES + fix-cmakelists.patch +) + +vcpkg_cmake_configure( + SOURCE_PATH "${SOURCE_PATH}" + OPTIONS + -DPTHREADPOOL_BUILD_TESTS=OFF + -DPTHREADPOOL_BUILD_BENCHMARKS=OFF +) +vcpkg_cmake_install() +vcpkg_copy_pdbs() +vcpkg_cmake_config_fixup(PACKAGE_NAME unofficial-${PORT}) + +#file(INSTALL "${SOURCE_PATH}/LICENSE" DESTINATION "${CURRENT_PACKAGES_DIR}/share/${PORT}" RENAME copyright) +file(REMOVE_RECURSE "${CURRENT_PACKAGES_DIR}/debug/include") diff --git a/cmake/vcpkg-ports/pthreadpool/vcpkg.json b/cmake/vcpkg-ports/pthreadpool/vcpkg.json new file mode 100644 index 0000000000000..16c0bea5b712c --- /dev/null +++ b/cmake/vcpkg-ports/pthreadpool/vcpkg.json @@ -0,0 +1,17 @@ +{ + "name": "pthreadpool", + "version-date": "2024-12-17", + "description": "Portable (POSIX/Windows/Emscripten) thread pool for C/C++", + "homepage": "https://github.com/google/pthreadpool", + "dependencies": [ + "fxdiv", + { + "name": "vcpkg-cmake", + "host": true + }, + { + "name": "vcpkg-cmake-config", + "host": true + } + ] +} diff --git a/cmake/vcpkg-ports/xnnpack/disable_gcc_warning.patch b/cmake/vcpkg-ports/xnnpack/disable_gcc_warning.patch new file mode 100644 index 0000000000000..a7c5e0e254aa1 --- /dev/null +++ b/cmake/vcpkg-ports/xnnpack/disable_gcc_warning.patch @@ -0,0 +1,12 @@ +diff --git a/CMakeLists.txt b/CMakeLists.txt +index 4a9fad59a..2713cded3 100644 +--- a/CMakeLists.txt ++++ b/CMakeLists.txt +@@ -334,6 +334,7 @@ ENDIF() + IF(CMAKE_CXX_COMPILER_ID STREQUAL "GNU") + # Disable "note: parameter passing for argument of type ... changed/will change in ..." + ADD_COMPILE_OPTIONS("-Wno-psabi") ++ ADD_COMPILE_OPTIONS("-Wno-incompatible-pointer-types") + ENDIF() + + # ---[ Build flags diff --git a/cmake/vcpkg-ports/xnnpack/fix-build.patch b/cmake/vcpkg-ports/xnnpack/fix-build.patch new file mode 100644 index 0000000000000..b867377d2ff9e --- /dev/null +++ b/cmake/vcpkg-ports/xnnpack/fix-build.patch @@ -0,0 +1,71 @@ +diff --git a/CMakeLists.txt b/CMakeLists.txt +index f0b3410ae..ba54c3bfe 100644 +--- a/CMakeLists.txt ++++ b/CMakeLists.txt +@@ -1047,9 +1047,11 @@ ENDIF() + IF(XNNPACK_BUILD_ALL_MICROKERNELS) + TARGET_INCLUDE_DIRECTORIES(microkernels-all PRIVATE include src) + ENDIF() ++ + TARGET_INCLUDE_DIRECTORIES(datatype PRIVATE include src) + TARGET_INCLUDE_DIRECTORIES(microkernels-prod PRIVATE include src) +-TARGET_INCLUDE_DIRECTORIES(hardware-config PRIVATE include src ${CPUINFO_SOURCE_DIR}/include) ++TARGET_INCLUDE_DIRECTORIES(hardware-config PRIVATE include src) ++ + TARGET_INCLUDE_DIRECTORIES(indirection PRIVATE include src) + TARGET_INCLUDE_DIRECTORIES(microparams-init PRIVATE include src) + TARGET_INCLUDE_DIRECTORIES(normalization PRIVATE include src) +@@ -1104,14 +1106,9 @@ IF(NOT TARGET cpuinfo) + "${CPUINFO_SOURCE_DIR}" + "${CMAKE_BINARY_DIR}/cpuinfo") + ELSE() +- ADD_LIBRARY(cpuinfo SHARED IMPORTED) +- FIND_LIBRARY(CPUINFO_LIBRARY cpuinfo PATHS "${CPUINFO_SOURCE_DIR}/lib") +- IF(NOT CPUINFO_LIBRARY) +- MESSAGE(FATAL_ERROR "Cannot find cpuinfo") +- ENDIF() +- TARGET_INCLUDE_DIRECTORIES(cpuinfo INTERFACE "${CPUINFO_SOURCE_DIR}/include") +- SET_PROPERTY(TARGET cpuinfo PROPERTY IMPORTED_LOCATION "${CPUINFO_LIBRARY}") +- SET_PROPERTY(TARGET cpuinfo PROPERTY IMPORTED_IMPLIB "${CPUINFO_LIBRARY}") ++ ADD_LIBRARY(cpuinfo INTERFACE) ++ FIND_PACKAGE(cpuinfo CONFIG REQUIRED) ++ TARGET_LINK_LIBRARIES(cpuinfo INTERFACE cpuinfo::cpuinfo) + ENDIF() + ENDIF() + IF(XNNPACK_BUILD_LIBRARY) +@@ -1129,16 +1126,12 @@ IF(NOT TARGET pthreadpool) + "${PTHREADPOOL_SOURCE_DIR}" + "${CMAKE_BINARY_DIR}/pthreadpool") + ELSE() ++ find_package(unofficial-pthreadpool CONFIG REQUIRED) + ADD_LIBRARY(pthreadpool SHARED IMPORTED) +- FIND_LIBRARY(PTHREADPOOL_LIBRARY pthreadpool PATHS "${PTHREADPOOL_SOURCE_DIR}/lib") +- IF(NOT PTHREADPOOL_LIBRARY) +- MESSAGE(FATAL_ERROR "Cannot find pthreadpool") +- ENDIF() ++ FIND_LIBRARY(PTHREADPOOL_LIBRARY NAMES pthreadpool REQUIRED) + FIND_PACKAGE(Threads REQUIRED) +- TARGET_INCLUDE_DIRECTORIES(pthreadpool INTERFACE "${PTHREADPOOL_SOURCE_DIR}/include") +- TARGET_LINK_LIBRARIES(pthreadpool INTERFACE Threads::Threads) ++ TARGET_LINK_LIBRARIES(pthreadpool INTERFACE Threads::Threads unofficial::pthreadpool unofficial::pthreadpool_interface) + SET_PROPERTY(TARGET pthreadpool PROPERTY IMPORTED_LOCATION "${PTHREADPOOL_LIBRARY}") +- SET_PROPERTY(TARGET pthreadpool PROPERTY IMPORTED_IMPLIB "${PTHREADPOOL_LIBRARY}") + ENDIF() + ENDIF() + TARGET_LINK_LIBRARIES(xnnpack-base INTERFACE pthreadpool) +@@ -1152,12 +1145,12 @@ IF(NOT TARGET fxdiv) + "${FXDIV_SOURCE_DIR}" + "${CMAKE_BINARY_DIR}/FXdiv") + ELSE() +- FIND_FILE(FXDIV_HDR fxdiv.h PATH_SUFFIXES include PATHS "${FXDIV_SOURCE_DIR}") ++ FIND_PATH(FXDIV_HDR fxdiv.h PATH_SUFFIXES include) + IF(NOT FXDIV_HDR) + MESSAGE(FATAL_ERROR "Cannot find fxdiv") + ENDIF() +- ADD_LIBRARY(fxdiv STATIC "${FXDIV_HDR}") +- TARGET_INCLUDE_DIRECTORIES(fxdiv INTERFACE "${FXDIV_SOURCE_DIR}/include") ++ ADD_LIBRARY(fxdiv INTERFACE IMPORTED) ++ target_include_directories(fxdiv INTERFACE "${FXDIV_HDR}") + SET_PROPERTY(TARGET fxdiv PROPERTY LINKER_LANGUAGE C) + ENDIF() + ENDIF() diff --git a/cmake/vcpkg-ports/xnnpack/portfile.cmake b/cmake/vcpkg-ports/xnnpack/portfile.cmake new file mode 100644 index 0000000000000..b07da3186b4b4 --- /dev/null +++ b/cmake/vcpkg-ports/xnnpack/portfile.cmake @@ -0,0 +1,39 @@ +if(VCPKG_TARGET_IS_WINDOWS) + vcpkg_check_linkage(ONLY_STATIC_LIBRARY) +endif() + +vcpkg_from_github( + OUT_SOURCE_PATH SOURCE_PATH + REPO google/XNNPACK + REF 854b343f9cad36bd596e4390959ca3648208e048 + SHA512 f37384b43022cb74bf87bd99c2e82e51d48fe4e0e4642611fcbc10cbb86ff2468b67964027f13f82a715dc7201c490d88d5020fb565ad236187b9dd219f3f644 + HEAD_REF master + PATCHES + fix-build.patch + disable_gcc_warning.patch +) +vcpkg_find_acquire_program(PYTHON3) + +vcpkg_cmake_configure( + SOURCE_PATH "${SOURCE_PATH}" + WINDOWS_USE_MSBUILD + OPTIONS + "-DPython3_EXECUTABLE=${PYTHON3}" + "-DPython_EXECUTABLE=${PYTHON3}" + -DXNNPACK_USE_SYSTEM_LIBS=ON + -DXNNPACK_ENABLE_AVXVNNI=OFF + -DXNNPACK_ENABLE_ASSEMBLY=ON + -DXNNPACK_ENABLE_MEMOPT=ON + -DXNNPACK_ENABLE_SPARSE=ON + -DXNNPACK_ENABLE_KLEIDIAI=OFF + -DXNNPACK_BUILD_TESTS=OFF + -DXNNPACK_BUILD_BENCHMARKS=OFF +) +vcpkg_cmake_install() +vcpkg_copy_pdbs() + +file(INSTALL "${SOURCE_PATH}/LICENSE" DESTINATION "${CURRENT_PACKAGES_DIR}/share/${PORT}" RENAME copyright) +file(REMOVE_RECURSE "${CURRENT_PACKAGES_DIR}/debug/include" + "${CURRENT_PACKAGES_DIR}/debug/bin" + "${CURRENT_PACKAGES_DIR}/debug/share" +) diff --git a/cmake/vcpkg-ports/xnnpack/vcpkg.json b/cmake/vcpkg-ports/xnnpack/vcpkg.json new file mode 100644 index 0000000000000..5e383c0b37810 --- /dev/null +++ b/cmake/vcpkg-ports/xnnpack/vcpkg.json @@ -0,0 +1,17 @@ +{ + "name": "xnnpack", + "version-date": "2025-01-17", + "description": "High-efficiency floating-point neural network inference operators for mobile, server, and Web", + "homepage": "https://github.com/google/XNNPACK", + "license": "BSD-3-Clause", + "supports": "!(arm & windows) & !uwp & !arm32", + "dependencies": [ + "cpuinfo", + "fxdiv", + "pthreadpool", + { + "name": "vcpkg-cmake", + "host": true + } + ] +} diff --git a/cmake/vcpkg-triplets/asan/arm64-linux.cmake b/cmake/vcpkg-triplets/asan/arm64-linux.cmake index 6875a03064bfa..9f5c9997daedb 100644 --- a/cmake/vcpkg-triplets/asan/arm64-linux.cmake +++ b/cmake/vcpkg-triplets/asan/arm64-linux.cmake @@ -3,12 +3,15 @@ set(VCPKG_TARGET_ARCHITECTURE arm64) set(VCPKG_CRT_LINKAGE dynamic) set(VCPKG_LIBRARY_LINKAGE static) -set(VCPKG_C_FLAGS "-fsanitize=address") -set(VCPKG_CXX_FLAGS "-fsanitize=address") +set(VCPKG_C_FLAGS "-g -fsanitize=address") +set(VCPKG_CXX_FLAGS "-g -fsanitize=address") +set(VCPKG_C_FLAGS_RELEASE "-DNDEBUG -O3") +set(VCPKG_CXX_FLAGS_RELEASE "-DNDEBUG -O3") set(VCPKG_CMAKE_SYSTEM_NAME Linux) set(CMAKE_POSITION_INDEPENDENT_CODE ON) list(APPEND VCPKG_CMAKE_CONFIGURE_OPTIONS --compile-no-warning-as-error -DBENCHMARK_ENABLE_WERROR=OFF) -set(VCPKG_LINKER_FLAGS "-fsanitize=address") +set(VCPKG_LINKER_FLAGS "-fsanitize=address -g") +list(APPEND VCPKG_CMAKE_CONFIGURE_OPTIONS -DCMAKE_CXX_STANDARD=17) if(PORT MATCHES "onnx") list(APPEND VCPKG_CMAKE_CONFIGURE_OPTIONS "-DONNX_DISABLE_STATIC_REGISTRATION=ON" diff --git a/cmake/vcpkg-triplets/asan/arm64-osx.cmake b/cmake/vcpkg-triplets/asan/arm64-osx.cmake index 4ac6bd8097de6..ba56684949836 100644 --- a/cmake/vcpkg-triplets/asan/arm64-osx.cmake +++ b/cmake/vcpkg-triplets/asan/arm64-osx.cmake @@ -3,13 +3,16 @@ set(VCPKG_TARGET_ARCHITECTURE arm64) set(VCPKG_CRT_LINKAGE dynamic) set(VCPKG_LIBRARY_LINKAGE static) -set(VCPKG_C_FLAGS "-fsanitize=address") -set(VCPKG_CXX_FLAGS "-fsanitize=address") +set(VCPKG_C_FLAGS "-g -fsanitize=address") +set(VCPKG_CXX_FLAGS "-g -fsanitize=address") +set(VCPKG_C_FLAGS_RELEASE "-DNDEBUG -O3") +set(VCPKG_CXX_FLAGS_RELEASE "-DNDEBUG -O3") set(VCPKG_CMAKE_SYSTEM_NAME Darwin) set(VCPKG_OSX_ARCHITECTURES "arm64") set(CMAKE_POSITION_INDEPENDENT_CODE ON) list(APPEND VCPKG_CMAKE_CONFIGURE_OPTIONS --compile-no-warning-as-error -DBENCHMARK_ENABLE_WERROR=OFF) -set(VCPKG_LINKER_FLAGS "-fsanitize=address") +set(VCPKG_LINKER_FLAGS "-fsanitize=address -g") +list(APPEND VCPKG_CMAKE_CONFIGURE_OPTIONS -DCMAKE_CXX_STANDARD=20) if(PORT MATCHES "onnx") list(APPEND VCPKG_CMAKE_CONFIGURE_OPTIONS "-DONNX_DISABLE_STATIC_REGISTRATION=ON" diff --git a/cmake/vcpkg-triplets/asan/arm64-windows-static-md.cmake b/cmake/vcpkg-triplets/asan/arm64-windows-static-md.cmake index c03c9e718fc80..79e10ad9e4436 100644 --- a/cmake/vcpkg-triplets/asan/arm64-windows-static-md.cmake +++ b/cmake/vcpkg-triplets/asan/arm64-windows-static-md.cmake @@ -5,7 +5,7 @@ set(VCPKG_CRT_LINKAGE dynamic) set(VCPKG_LIBRARY_LINKAGE static) set(VCPKG_C_FLAGS "/MP /DWIN32 /D_WINDOWS /DWINAPI_FAMILY=100 /DWINVER=0x0A00 /D_WIN32_WINNT=0x0A00 /DNTDDI_VERSION=0x0A000000 /fsanitize=address") set(VCPKG_CXX_FLAGS "/MP /DWIN32 /D_WINDOWS /DWINAPI_FAMILY=100 /DWINVER=0x0A00 /D_WIN32_WINNT=0x0A00 /DNTDDI_VERSION=0x0A000000 /fsanitize=address /Zc:__cplusplus") -list(APPEND VCPKG_CMAKE_CONFIGURE_OPTIONS --compile-no-warning-as-error) +list(APPEND VCPKG_CMAKE_CONFIGURE_OPTIONS --compile-no-warning-as-error -DCMAKE_CXX_STANDARD=17) if(PORT MATCHES "onnx") list(APPEND VCPKG_CMAKE_CONFIGURE_OPTIONS "-DONNX_DISABLE_STATIC_REGISTRATION=ON" diff --git a/cmake/vcpkg-triplets/asan/arm64-windows-static.cmake b/cmake/vcpkg-triplets/asan/arm64-windows-static.cmake index 184001d4238b0..d0a3305b1f74a 100644 --- a/cmake/vcpkg-triplets/asan/arm64-windows-static.cmake +++ b/cmake/vcpkg-triplets/asan/arm64-windows-static.cmake @@ -5,7 +5,7 @@ set(VCPKG_CRT_LINKAGE static) set(VCPKG_LIBRARY_LINKAGE static) set(VCPKG_C_FLAGS "/MP /DWIN32 /D_WINDOWS /DWINAPI_FAMILY=100 /DWINVER=0x0A00 /D_WIN32_WINNT=0x0A00 /DNTDDI_VERSION=0x0A000000 /fsanitize=address") set(VCPKG_CXX_FLAGS "/MP /DWIN32 /D_WINDOWS /DWINAPI_FAMILY=100 /DWINVER=0x0A00 /D_WIN32_WINNT=0x0A00 /DNTDDI_VERSION=0x0A000000 /fsanitize=address /Zc:__cplusplus") -list(APPEND VCPKG_CMAKE_CONFIGURE_OPTIONS --compile-no-warning-as-error) +list(APPEND VCPKG_CMAKE_CONFIGURE_OPTIONS --compile-no-warning-as-error -DCMAKE_CXX_STANDARD=17) if(PORT MATCHES "onnx") list(APPEND VCPKG_CMAKE_CONFIGURE_OPTIONS "-DONNX_DISABLE_STATIC_REGISTRATION=ON" diff --git a/cmake/vcpkg-triplets/asan/arm64ec-windows-static-md.cmake b/cmake/vcpkg-triplets/asan/arm64ec-windows-static-md.cmake index 36176fe04033e..05a9718835ffb 100644 --- a/cmake/vcpkg-triplets/asan/arm64ec-windows-static-md.cmake +++ b/cmake/vcpkg-triplets/asan/arm64ec-windows-static-md.cmake @@ -5,7 +5,7 @@ set(VCPKG_CRT_LINKAGE dynamic) set(VCPKG_LIBRARY_LINKAGE static) set(VCPKG_C_FLAGS "/MP /DWIN32 /D_WINDOWS /DWINAPI_FAMILY=100 /DWINVER=0x0A00 /D_WIN32_WINNT=0x0A00 /DNTDDI_VERSION=0x0A000000 /fsanitize=address") set(VCPKG_CXX_FLAGS "/MP /DWIN32 /D_WINDOWS /DWINAPI_FAMILY=100 /DWINVER=0x0A00 /D_WIN32_WINNT=0x0A00 /DNTDDI_VERSION=0x0A000000 /fsanitize=address /Zc:__cplusplus") -list(APPEND VCPKG_CMAKE_CONFIGURE_OPTIONS --compile-no-warning-as-error) +list(APPEND VCPKG_CMAKE_CONFIGURE_OPTIONS --compile-no-warning-as-error -DCMAKE_CXX_STANDARD=17) if(PORT MATCHES "onnx") list(APPEND VCPKG_CMAKE_CONFIGURE_OPTIONS "-DONNX_DISABLE_STATIC_REGISTRATION=ON" diff --git a/cmake/vcpkg-triplets/asan/arm64ec-windows-static.cmake b/cmake/vcpkg-triplets/asan/arm64ec-windows-static.cmake index aa086c1220dfb..e0f4b2e1e4183 100644 --- a/cmake/vcpkg-triplets/asan/arm64ec-windows-static.cmake +++ b/cmake/vcpkg-triplets/asan/arm64ec-windows-static.cmake @@ -5,7 +5,7 @@ set(VCPKG_CRT_LINKAGE static) set(VCPKG_LIBRARY_LINKAGE static) set(VCPKG_C_FLAGS "/MP /DWIN32 /D_WINDOWS /DWINAPI_FAMILY=100 /DWINVER=0x0A00 /D_WIN32_WINNT=0x0A00 /DNTDDI_VERSION=0x0A000000 /fsanitize=address") set(VCPKG_CXX_FLAGS "/MP /DWIN32 /D_WINDOWS /DWINAPI_FAMILY=100 /DWINVER=0x0A00 /D_WIN32_WINNT=0x0A00 /DNTDDI_VERSION=0x0A000000 /fsanitize=address /Zc:__cplusplus") -list(APPEND VCPKG_CMAKE_CONFIGURE_OPTIONS --compile-no-warning-as-error) +list(APPEND VCPKG_CMAKE_CONFIGURE_OPTIONS --compile-no-warning-as-error -DCMAKE_CXX_STANDARD=17) if(PORT MATCHES "onnx") list(APPEND VCPKG_CMAKE_CONFIGURE_OPTIONS "-DONNX_DISABLE_STATIC_REGISTRATION=ON" diff --git a/cmake/vcpkg-triplets/asan/universal2-osx.cmake b/cmake/vcpkg-triplets/asan/universal2-osx.cmake index de2c8cee48ed5..d74494d578cd9 100644 --- a/cmake/vcpkg-triplets/asan/universal2-osx.cmake +++ b/cmake/vcpkg-triplets/asan/universal2-osx.cmake @@ -3,13 +3,16 @@ set(VCPKG_TARGET_ARCHITECTURE x64) set(VCPKG_CRT_LINKAGE dynamic) set(VCPKG_LIBRARY_LINKAGE static) -set(VCPKG_C_FLAGS "-fsanitize=address") -set(VCPKG_CXX_FLAGS "-fsanitize=address") +set(VCPKG_C_FLAGS "-g -fsanitize=address") +set(VCPKG_CXX_FLAGS "-g -fsanitize=address") +set(VCPKG_C_FLAGS_RELEASE "-DNDEBUG -O3") +set(VCPKG_CXX_FLAGS_RELEASE "-DNDEBUG -O3") set(VCPKG_CMAKE_SYSTEM_NAME Darwin) set(VCPKG_OSX_ARCHITECTURES "x86_64;arm64") set(CMAKE_POSITION_INDEPENDENT_CODE ON) list(APPEND VCPKG_CMAKE_CONFIGURE_OPTIONS --compile-no-warning-as-error -DBENCHMARK_ENABLE_WERROR=OFF) -set(VCPKG_LINKER_FLAGS "-fsanitize=address") +set(VCPKG_LINKER_FLAGS "-fsanitize=address -g") +list(APPEND VCPKG_CMAKE_CONFIGURE_OPTIONS -DCMAKE_CXX_STANDARD=20) if(PORT MATCHES "onnx") list(APPEND VCPKG_CMAKE_CONFIGURE_OPTIONS "-DONNX_DISABLE_STATIC_REGISTRATION=ON" diff --git a/cmake/vcpkg-triplets/asan/x64-linux.cmake b/cmake/vcpkg-triplets/asan/x64-linux.cmake index dd1d066eb373a..64ba6b2216394 100644 --- a/cmake/vcpkg-triplets/asan/x64-linux.cmake +++ b/cmake/vcpkg-triplets/asan/x64-linux.cmake @@ -3,12 +3,15 @@ set(VCPKG_TARGET_ARCHITECTURE x64) set(VCPKG_CRT_LINKAGE dynamic) set(VCPKG_LIBRARY_LINKAGE static) -set(VCPKG_C_FLAGS "-fsanitize=address") -set(VCPKG_CXX_FLAGS "-fsanitize=address") +set(VCPKG_C_FLAGS "-g -fsanitize=address") +set(VCPKG_CXX_FLAGS "-g -fsanitize=address") +set(VCPKG_C_FLAGS_RELEASE "-DNDEBUG -O3") +set(VCPKG_CXX_FLAGS_RELEASE "-DNDEBUG -O3") set(VCPKG_CMAKE_SYSTEM_NAME Linux) set(CMAKE_POSITION_INDEPENDENT_CODE ON) list(APPEND VCPKG_CMAKE_CONFIGURE_OPTIONS --compile-no-warning-as-error -DBENCHMARK_ENABLE_WERROR=OFF) -set(VCPKG_LINKER_FLAGS "-fsanitize=address") +set(VCPKG_LINKER_FLAGS "-fsanitize=address -g") +list(APPEND VCPKG_CMAKE_CONFIGURE_OPTIONS -DCMAKE_CXX_STANDARD=17) if(PORT MATCHES "onnx") list(APPEND VCPKG_CMAKE_CONFIGURE_OPTIONS "-DONNX_DISABLE_STATIC_REGISTRATION=ON" diff --git a/cmake/vcpkg-triplets/asan/x64-osx.cmake b/cmake/vcpkg-triplets/asan/x64-osx.cmake index 5f1442c1d5c4e..bbcaff4c39209 100644 --- a/cmake/vcpkg-triplets/asan/x64-osx.cmake +++ b/cmake/vcpkg-triplets/asan/x64-osx.cmake @@ -3,13 +3,16 @@ set(VCPKG_TARGET_ARCHITECTURE x64) set(VCPKG_CRT_LINKAGE dynamic) set(VCPKG_LIBRARY_LINKAGE static) -set(VCPKG_C_FLAGS "-fsanitize=address") -set(VCPKG_CXX_FLAGS "-fsanitize=address") +set(VCPKG_C_FLAGS "-g -fsanitize=address") +set(VCPKG_CXX_FLAGS "-g -fsanitize=address") +set(VCPKG_C_FLAGS_RELEASE "-DNDEBUG -O3") +set(VCPKG_CXX_FLAGS_RELEASE "-DNDEBUG -O3") set(VCPKG_CMAKE_SYSTEM_NAME Darwin) set(VCPKG_OSX_ARCHITECTURES "x86_64") set(CMAKE_POSITION_INDEPENDENT_CODE ON) list(APPEND VCPKG_CMAKE_CONFIGURE_OPTIONS --compile-no-warning-as-error -DBENCHMARK_ENABLE_WERROR=OFF) -set(VCPKG_LINKER_FLAGS "-fsanitize=address") +set(VCPKG_LINKER_FLAGS "-fsanitize=address -g") +list(APPEND VCPKG_CMAKE_CONFIGURE_OPTIONS -DCMAKE_CXX_STANDARD=20) if(PORT MATCHES "onnx") list(APPEND VCPKG_CMAKE_CONFIGURE_OPTIONS "-DONNX_DISABLE_STATIC_REGISTRATION=ON" diff --git a/cmake/vcpkg-triplets/asan/x64-windows-static-md.cmake b/cmake/vcpkg-triplets/asan/x64-windows-static-md.cmake index 27f7a0190a33c..c0edb9ca31cb6 100644 --- a/cmake/vcpkg-triplets/asan/x64-windows-static-md.cmake +++ b/cmake/vcpkg-triplets/asan/x64-windows-static-md.cmake @@ -5,7 +5,7 @@ set(VCPKG_CRT_LINKAGE dynamic) set(VCPKG_LIBRARY_LINKAGE static) set(VCPKG_C_FLAGS "/MP /DWIN32 /D_WINDOWS /DWINAPI_FAMILY=100 /DWINVER=0x0A00 /D_WIN32_WINNT=0x0A00 /DNTDDI_VERSION=0x0A000000 /fsanitize=address") set(VCPKG_CXX_FLAGS "/MP /DWIN32 /D_WINDOWS /DWINAPI_FAMILY=100 /DWINVER=0x0A00 /D_WIN32_WINNT=0x0A00 /DNTDDI_VERSION=0x0A000000 /fsanitize=address /Zc:__cplusplus") -list(APPEND VCPKG_CMAKE_CONFIGURE_OPTIONS --compile-no-warning-as-error) +list(APPEND VCPKG_CMAKE_CONFIGURE_OPTIONS --compile-no-warning-as-error -DCMAKE_CXX_STANDARD=17) if(PORT MATCHES "onnx") list(APPEND VCPKG_CMAKE_CONFIGURE_OPTIONS "-DONNX_DISABLE_STATIC_REGISTRATION=ON" diff --git a/cmake/vcpkg-triplets/asan/x64-windows-static.cmake b/cmake/vcpkg-triplets/asan/x64-windows-static.cmake index 23b8082fbd5a3..3370987c55a12 100644 --- a/cmake/vcpkg-triplets/asan/x64-windows-static.cmake +++ b/cmake/vcpkg-triplets/asan/x64-windows-static.cmake @@ -5,7 +5,7 @@ set(VCPKG_CRT_LINKAGE static) set(VCPKG_LIBRARY_LINKAGE static) set(VCPKG_C_FLAGS "/MP /DWIN32 /D_WINDOWS /DWINAPI_FAMILY=100 /DWINVER=0x0A00 /D_WIN32_WINNT=0x0A00 /DNTDDI_VERSION=0x0A000000 /fsanitize=address") set(VCPKG_CXX_FLAGS "/MP /DWIN32 /D_WINDOWS /DWINAPI_FAMILY=100 /DWINVER=0x0A00 /D_WIN32_WINNT=0x0A00 /DNTDDI_VERSION=0x0A000000 /fsanitize=address /Zc:__cplusplus") -list(APPEND VCPKG_CMAKE_CONFIGURE_OPTIONS --compile-no-warning-as-error) +list(APPEND VCPKG_CMAKE_CONFIGURE_OPTIONS --compile-no-warning-as-error -DCMAKE_CXX_STANDARD=17) if(PORT MATCHES "onnx") list(APPEND VCPKG_CMAKE_CONFIGURE_OPTIONS "-DONNX_DISABLE_STATIC_REGISTRATION=ON" diff --git a/cmake/vcpkg-triplets/asan/x86-windows-static-md.cmake b/cmake/vcpkg-triplets/asan/x86-windows-static-md.cmake index cb9c639049936..429a4ac7cea36 100644 --- a/cmake/vcpkg-triplets/asan/x86-windows-static-md.cmake +++ b/cmake/vcpkg-triplets/asan/x86-windows-static-md.cmake @@ -5,7 +5,7 @@ set(VCPKG_CRT_LINKAGE dynamic) set(VCPKG_LIBRARY_LINKAGE static) set(VCPKG_C_FLAGS "/MP /DWIN32 /D_WINDOWS /DWINAPI_FAMILY=100 /DWINVER=0x0A00 /D_WIN32_WINNT=0x0A00 /DNTDDI_VERSION=0x0A000000 /fsanitize=address") set(VCPKG_CXX_FLAGS "/MP /DWIN32 /D_WINDOWS /DWINAPI_FAMILY=100 /DWINVER=0x0A00 /D_WIN32_WINNT=0x0A00 /DNTDDI_VERSION=0x0A000000 /fsanitize=address /Zc:__cplusplus") -list(APPEND VCPKG_CMAKE_CONFIGURE_OPTIONS --compile-no-warning-as-error) +list(APPEND VCPKG_CMAKE_CONFIGURE_OPTIONS --compile-no-warning-as-error -DCMAKE_CXX_STANDARD=17) if(PORT MATCHES "onnx") list(APPEND VCPKG_CMAKE_CONFIGURE_OPTIONS "-DONNX_DISABLE_STATIC_REGISTRATION=ON" diff --git a/cmake/vcpkg-triplets/asan/x86-windows-static.cmake b/cmake/vcpkg-triplets/asan/x86-windows-static.cmake index 0667f5f0ea61e..404cb3fbd07fb 100644 --- a/cmake/vcpkg-triplets/asan/x86-windows-static.cmake +++ b/cmake/vcpkg-triplets/asan/x86-windows-static.cmake @@ -5,7 +5,7 @@ set(VCPKG_CRT_LINKAGE static) set(VCPKG_LIBRARY_LINKAGE static) set(VCPKG_C_FLAGS "/MP /DWIN32 /D_WINDOWS /DWINAPI_FAMILY=100 /DWINVER=0x0A00 /D_WIN32_WINNT=0x0A00 /DNTDDI_VERSION=0x0A000000 /fsanitize=address") set(VCPKG_CXX_FLAGS "/MP /DWIN32 /D_WINDOWS /DWINAPI_FAMILY=100 /DWINVER=0x0A00 /D_WIN32_WINNT=0x0A00 /DNTDDI_VERSION=0x0A000000 /fsanitize=address /Zc:__cplusplus") -list(APPEND VCPKG_CMAKE_CONFIGURE_OPTIONS --compile-no-warning-as-error) +list(APPEND VCPKG_CMAKE_CONFIGURE_OPTIONS --compile-no-warning-as-error -DCMAKE_CXX_STANDARD=17) if(PORT MATCHES "onnx") list(APPEND VCPKG_CMAKE_CONFIGURE_OPTIONS "-DONNX_DISABLE_STATIC_REGISTRATION=ON" diff --git a/cmake/vcpkg-triplets/asan_nortti/arm64-linux.cmake b/cmake/vcpkg-triplets/asan_nortti/arm64-linux.cmake index 77f35ebada258..3d78741ebcf1d 100644 --- a/cmake/vcpkg-triplets/asan_nortti/arm64-linux.cmake +++ b/cmake/vcpkg-triplets/asan_nortti/arm64-linux.cmake @@ -3,12 +3,15 @@ set(VCPKG_TARGET_ARCHITECTURE arm64) set(VCPKG_CRT_LINKAGE dynamic) set(VCPKG_LIBRARY_LINKAGE static) -set(VCPKG_C_FLAGS "-fsanitize=address -DEMSCRIPTEN_HAS_UNBOUND_TYPE_NAMES=0") -set(VCPKG_CXX_FLAGS "-fsanitize=address -DEMSCRIPTEN_HAS_UNBOUND_TYPE_NAMES=0 -fno-rtti") +set(VCPKG_C_FLAGS "-g -fsanitize=address -DEMSCRIPTEN_HAS_UNBOUND_TYPE_NAMES=0") +set(VCPKG_CXX_FLAGS "-g -fsanitize=address -DEMSCRIPTEN_HAS_UNBOUND_TYPE_NAMES=0 -fno-rtti") +set(VCPKG_C_FLAGS_RELEASE "-DNDEBUG -O3") +set(VCPKG_CXX_FLAGS_RELEASE "-DNDEBUG -O3") set(VCPKG_CMAKE_SYSTEM_NAME Linux) set(CMAKE_POSITION_INDEPENDENT_CODE ON) list(APPEND VCPKG_CMAKE_CONFIGURE_OPTIONS --compile-no-warning-as-error -DBENCHMARK_ENABLE_WERROR=OFF) -set(VCPKG_LINKER_FLAGS "-fsanitize=address") +set(VCPKG_LINKER_FLAGS "-fsanitize=address -g") +list(APPEND VCPKG_CMAKE_CONFIGURE_OPTIONS -DCMAKE_CXX_STANDARD=17) if(PORT MATCHES "onnx") list(APPEND VCPKG_CMAKE_CONFIGURE_OPTIONS "-DONNX_DISABLE_STATIC_REGISTRATION=ON" diff --git a/cmake/vcpkg-triplets/asan_nortti/arm64-osx.cmake b/cmake/vcpkg-triplets/asan_nortti/arm64-osx.cmake index 5cc70905e6e24..b25f8f8ebb8d1 100644 --- a/cmake/vcpkg-triplets/asan_nortti/arm64-osx.cmake +++ b/cmake/vcpkg-triplets/asan_nortti/arm64-osx.cmake @@ -3,13 +3,16 @@ set(VCPKG_TARGET_ARCHITECTURE arm64) set(VCPKG_CRT_LINKAGE dynamic) set(VCPKG_LIBRARY_LINKAGE static) -set(VCPKG_C_FLAGS "-fsanitize=address -DEMSCRIPTEN_HAS_UNBOUND_TYPE_NAMES=0") -set(VCPKG_CXX_FLAGS "-fsanitize=address -DEMSCRIPTEN_HAS_UNBOUND_TYPE_NAMES=0 -fno-rtti") +set(VCPKG_C_FLAGS "-g -fsanitize=address -DEMSCRIPTEN_HAS_UNBOUND_TYPE_NAMES=0") +set(VCPKG_CXX_FLAGS "-g -fsanitize=address -DEMSCRIPTEN_HAS_UNBOUND_TYPE_NAMES=0 -fno-rtti") +set(VCPKG_C_FLAGS_RELEASE "-DNDEBUG -O3") +set(VCPKG_CXX_FLAGS_RELEASE "-DNDEBUG -O3") set(VCPKG_CMAKE_SYSTEM_NAME Darwin) set(VCPKG_OSX_ARCHITECTURES "arm64") set(CMAKE_POSITION_INDEPENDENT_CODE ON) list(APPEND VCPKG_CMAKE_CONFIGURE_OPTIONS --compile-no-warning-as-error -DBENCHMARK_ENABLE_WERROR=OFF) -set(VCPKG_LINKER_FLAGS "-fsanitize=address") +set(VCPKG_LINKER_FLAGS "-fsanitize=address -g") +list(APPEND VCPKG_CMAKE_CONFIGURE_OPTIONS -DCMAKE_CXX_STANDARD=20) if(PORT MATCHES "onnx") list(APPEND VCPKG_CMAKE_CONFIGURE_OPTIONS "-DONNX_DISABLE_STATIC_REGISTRATION=ON" diff --git a/cmake/vcpkg-triplets/asan_nortti/arm64-windows-static-md.cmake b/cmake/vcpkg-triplets/asan_nortti/arm64-windows-static-md.cmake index cb0957791f432..c4ba82b7cac2a 100644 --- a/cmake/vcpkg-triplets/asan_nortti/arm64-windows-static-md.cmake +++ b/cmake/vcpkg-triplets/asan_nortti/arm64-windows-static-md.cmake @@ -5,7 +5,7 @@ set(VCPKG_CRT_LINKAGE dynamic) set(VCPKG_LIBRARY_LINKAGE static) set(VCPKG_C_FLAGS "/MP /DWIN32 /D_WINDOWS /DWINAPI_FAMILY=100 /DWINVER=0x0A00 /D_WIN32_WINNT=0x0A00 /DNTDDI_VERSION=0x0A000000 /fsanitize=address") set(VCPKG_CXX_FLAGS "/MP /DWIN32 /D_WINDOWS /DWINAPI_FAMILY=100 /DWINVER=0x0A00 /D_WIN32_WINNT=0x0A00 /DNTDDI_VERSION=0x0A000000 /fsanitize=address /Zc:__cplusplus /GR- /we4541") -list(APPEND VCPKG_CMAKE_CONFIGURE_OPTIONS --compile-no-warning-as-error) +list(APPEND VCPKG_CMAKE_CONFIGURE_OPTIONS --compile-no-warning-as-error -DCMAKE_CXX_STANDARD=17) if(PORT MATCHES "onnx") list(APPEND VCPKG_CMAKE_CONFIGURE_OPTIONS "-DONNX_DISABLE_STATIC_REGISTRATION=ON" diff --git a/cmake/vcpkg-triplets/asan_nortti/arm64-windows-static.cmake b/cmake/vcpkg-triplets/asan_nortti/arm64-windows-static.cmake index 2d38883062bb1..3b028c4e40bcc 100644 --- a/cmake/vcpkg-triplets/asan_nortti/arm64-windows-static.cmake +++ b/cmake/vcpkg-triplets/asan_nortti/arm64-windows-static.cmake @@ -5,7 +5,7 @@ set(VCPKG_CRT_LINKAGE static) set(VCPKG_LIBRARY_LINKAGE static) set(VCPKG_C_FLAGS "/MP /DWIN32 /D_WINDOWS /DWINAPI_FAMILY=100 /DWINVER=0x0A00 /D_WIN32_WINNT=0x0A00 /DNTDDI_VERSION=0x0A000000 /fsanitize=address") set(VCPKG_CXX_FLAGS "/MP /DWIN32 /D_WINDOWS /DWINAPI_FAMILY=100 /DWINVER=0x0A00 /D_WIN32_WINNT=0x0A00 /DNTDDI_VERSION=0x0A000000 /fsanitize=address /Zc:__cplusplus /GR- /we4541") -list(APPEND VCPKG_CMAKE_CONFIGURE_OPTIONS --compile-no-warning-as-error) +list(APPEND VCPKG_CMAKE_CONFIGURE_OPTIONS --compile-no-warning-as-error -DCMAKE_CXX_STANDARD=17) if(PORT MATCHES "onnx") list(APPEND VCPKG_CMAKE_CONFIGURE_OPTIONS "-DONNX_DISABLE_STATIC_REGISTRATION=ON" diff --git a/cmake/vcpkg-triplets/asan_nortti/arm64ec-windows-static-md.cmake b/cmake/vcpkg-triplets/asan_nortti/arm64ec-windows-static-md.cmake index 4cc7102bf3b1c..d2d4bda334e38 100644 --- a/cmake/vcpkg-triplets/asan_nortti/arm64ec-windows-static-md.cmake +++ b/cmake/vcpkg-triplets/asan_nortti/arm64ec-windows-static-md.cmake @@ -5,7 +5,7 @@ set(VCPKG_CRT_LINKAGE dynamic) set(VCPKG_LIBRARY_LINKAGE static) set(VCPKG_C_FLAGS "/MP /DWIN32 /D_WINDOWS /DWINAPI_FAMILY=100 /DWINVER=0x0A00 /D_WIN32_WINNT=0x0A00 /DNTDDI_VERSION=0x0A000000 /fsanitize=address") set(VCPKG_CXX_FLAGS "/MP /DWIN32 /D_WINDOWS /DWINAPI_FAMILY=100 /DWINVER=0x0A00 /D_WIN32_WINNT=0x0A00 /DNTDDI_VERSION=0x0A000000 /fsanitize=address /Zc:__cplusplus /GR- /we4541") -list(APPEND VCPKG_CMAKE_CONFIGURE_OPTIONS --compile-no-warning-as-error) +list(APPEND VCPKG_CMAKE_CONFIGURE_OPTIONS --compile-no-warning-as-error -DCMAKE_CXX_STANDARD=17) if(PORT MATCHES "onnx") list(APPEND VCPKG_CMAKE_CONFIGURE_OPTIONS "-DONNX_DISABLE_STATIC_REGISTRATION=ON" diff --git a/cmake/vcpkg-triplets/asan_nortti/arm64ec-windows-static.cmake b/cmake/vcpkg-triplets/asan_nortti/arm64ec-windows-static.cmake index d84533c8de35c..8e986eb139862 100644 --- a/cmake/vcpkg-triplets/asan_nortti/arm64ec-windows-static.cmake +++ b/cmake/vcpkg-triplets/asan_nortti/arm64ec-windows-static.cmake @@ -5,7 +5,7 @@ set(VCPKG_CRT_LINKAGE static) set(VCPKG_LIBRARY_LINKAGE static) set(VCPKG_C_FLAGS "/MP /DWIN32 /D_WINDOWS /DWINAPI_FAMILY=100 /DWINVER=0x0A00 /D_WIN32_WINNT=0x0A00 /DNTDDI_VERSION=0x0A000000 /fsanitize=address") set(VCPKG_CXX_FLAGS "/MP /DWIN32 /D_WINDOWS /DWINAPI_FAMILY=100 /DWINVER=0x0A00 /D_WIN32_WINNT=0x0A00 /DNTDDI_VERSION=0x0A000000 /fsanitize=address /Zc:__cplusplus /GR- /we4541") -list(APPEND VCPKG_CMAKE_CONFIGURE_OPTIONS --compile-no-warning-as-error) +list(APPEND VCPKG_CMAKE_CONFIGURE_OPTIONS --compile-no-warning-as-error -DCMAKE_CXX_STANDARD=17) if(PORT MATCHES "onnx") list(APPEND VCPKG_CMAKE_CONFIGURE_OPTIONS "-DONNX_DISABLE_STATIC_REGISTRATION=ON" diff --git a/cmake/vcpkg-triplets/asan_nortti/universal2-osx.cmake b/cmake/vcpkg-triplets/asan_nortti/universal2-osx.cmake index cacbfa751677d..6181e6d1c161b 100644 --- a/cmake/vcpkg-triplets/asan_nortti/universal2-osx.cmake +++ b/cmake/vcpkg-triplets/asan_nortti/universal2-osx.cmake @@ -3,13 +3,16 @@ set(VCPKG_TARGET_ARCHITECTURE x64) set(VCPKG_CRT_LINKAGE dynamic) set(VCPKG_LIBRARY_LINKAGE static) -set(VCPKG_C_FLAGS "-fsanitize=address -DEMSCRIPTEN_HAS_UNBOUND_TYPE_NAMES=0") -set(VCPKG_CXX_FLAGS "-fsanitize=address -DEMSCRIPTEN_HAS_UNBOUND_TYPE_NAMES=0 -fno-rtti") +set(VCPKG_C_FLAGS "-g -fsanitize=address -DEMSCRIPTEN_HAS_UNBOUND_TYPE_NAMES=0") +set(VCPKG_CXX_FLAGS "-g -fsanitize=address -DEMSCRIPTEN_HAS_UNBOUND_TYPE_NAMES=0 -fno-rtti") +set(VCPKG_C_FLAGS_RELEASE "-DNDEBUG -O3") +set(VCPKG_CXX_FLAGS_RELEASE "-DNDEBUG -O3") set(VCPKG_CMAKE_SYSTEM_NAME Darwin) set(VCPKG_OSX_ARCHITECTURES "x86_64;arm64") set(CMAKE_POSITION_INDEPENDENT_CODE ON) list(APPEND VCPKG_CMAKE_CONFIGURE_OPTIONS --compile-no-warning-as-error -DBENCHMARK_ENABLE_WERROR=OFF) -set(VCPKG_LINKER_FLAGS "-fsanitize=address") +set(VCPKG_LINKER_FLAGS "-fsanitize=address -g") +list(APPEND VCPKG_CMAKE_CONFIGURE_OPTIONS -DCMAKE_CXX_STANDARD=20) if(PORT MATCHES "onnx") list(APPEND VCPKG_CMAKE_CONFIGURE_OPTIONS "-DONNX_DISABLE_STATIC_REGISTRATION=ON" diff --git a/cmake/vcpkg-triplets/asan_nortti/x64-linux.cmake b/cmake/vcpkg-triplets/asan_nortti/x64-linux.cmake index b53e668a64c09..d7103ff2508bf 100644 --- a/cmake/vcpkg-triplets/asan_nortti/x64-linux.cmake +++ b/cmake/vcpkg-triplets/asan_nortti/x64-linux.cmake @@ -3,12 +3,15 @@ set(VCPKG_TARGET_ARCHITECTURE x64) set(VCPKG_CRT_LINKAGE dynamic) set(VCPKG_LIBRARY_LINKAGE static) -set(VCPKG_C_FLAGS "-fsanitize=address -DEMSCRIPTEN_HAS_UNBOUND_TYPE_NAMES=0") -set(VCPKG_CXX_FLAGS "-fsanitize=address -DEMSCRIPTEN_HAS_UNBOUND_TYPE_NAMES=0 -fno-rtti") +set(VCPKG_C_FLAGS "-g -fsanitize=address -DEMSCRIPTEN_HAS_UNBOUND_TYPE_NAMES=0") +set(VCPKG_CXX_FLAGS "-g -fsanitize=address -DEMSCRIPTEN_HAS_UNBOUND_TYPE_NAMES=0 -fno-rtti") +set(VCPKG_C_FLAGS_RELEASE "-DNDEBUG -O3") +set(VCPKG_CXX_FLAGS_RELEASE "-DNDEBUG -O3") set(VCPKG_CMAKE_SYSTEM_NAME Linux) set(CMAKE_POSITION_INDEPENDENT_CODE ON) list(APPEND VCPKG_CMAKE_CONFIGURE_OPTIONS --compile-no-warning-as-error -DBENCHMARK_ENABLE_WERROR=OFF) -set(VCPKG_LINKER_FLAGS "-fsanitize=address") +set(VCPKG_LINKER_FLAGS "-fsanitize=address -g") +list(APPEND VCPKG_CMAKE_CONFIGURE_OPTIONS -DCMAKE_CXX_STANDARD=17) if(PORT MATCHES "onnx") list(APPEND VCPKG_CMAKE_CONFIGURE_OPTIONS "-DONNX_DISABLE_STATIC_REGISTRATION=ON" diff --git a/cmake/vcpkg-triplets/asan_nortti/x64-osx.cmake b/cmake/vcpkg-triplets/asan_nortti/x64-osx.cmake index 9f4adb513edd4..191dfb3d35d10 100644 --- a/cmake/vcpkg-triplets/asan_nortti/x64-osx.cmake +++ b/cmake/vcpkg-triplets/asan_nortti/x64-osx.cmake @@ -3,13 +3,16 @@ set(VCPKG_TARGET_ARCHITECTURE x64) set(VCPKG_CRT_LINKAGE dynamic) set(VCPKG_LIBRARY_LINKAGE static) -set(VCPKG_C_FLAGS "-fsanitize=address -DEMSCRIPTEN_HAS_UNBOUND_TYPE_NAMES=0") -set(VCPKG_CXX_FLAGS "-fsanitize=address -DEMSCRIPTEN_HAS_UNBOUND_TYPE_NAMES=0 -fno-rtti") +set(VCPKG_C_FLAGS "-g -fsanitize=address -DEMSCRIPTEN_HAS_UNBOUND_TYPE_NAMES=0") +set(VCPKG_CXX_FLAGS "-g -fsanitize=address -DEMSCRIPTEN_HAS_UNBOUND_TYPE_NAMES=0 -fno-rtti") +set(VCPKG_C_FLAGS_RELEASE "-DNDEBUG -O3") +set(VCPKG_CXX_FLAGS_RELEASE "-DNDEBUG -O3") set(VCPKG_CMAKE_SYSTEM_NAME Darwin) set(VCPKG_OSX_ARCHITECTURES "x86_64") set(CMAKE_POSITION_INDEPENDENT_CODE ON) list(APPEND VCPKG_CMAKE_CONFIGURE_OPTIONS --compile-no-warning-as-error -DBENCHMARK_ENABLE_WERROR=OFF) -set(VCPKG_LINKER_FLAGS "-fsanitize=address") +set(VCPKG_LINKER_FLAGS "-fsanitize=address -g") +list(APPEND VCPKG_CMAKE_CONFIGURE_OPTIONS -DCMAKE_CXX_STANDARD=20) if(PORT MATCHES "onnx") list(APPEND VCPKG_CMAKE_CONFIGURE_OPTIONS "-DONNX_DISABLE_STATIC_REGISTRATION=ON" diff --git a/cmake/vcpkg-triplets/asan_nortti/x64-windows-static-md.cmake b/cmake/vcpkg-triplets/asan_nortti/x64-windows-static-md.cmake index 2812ed9419e43..ae3f00b851145 100644 --- a/cmake/vcpkg-triplets/asan_nortti/x64-windows-static-md.cmake +++ b/cmake/vcpkg-triplets/asan_nortti/x64-windows-static-md.cmake @@ -5,7 +5,7 @@ set(VCPKG_CRT_LINKAGE dynamic) set(VCPKG_LIBRARY_LINKAGE static) set(VCPKG_C_FLAGS "/MP /DWIN32 /D_WINDOWS /DWINAPI_FAMILY=100 /DWINVER=0x0A00 /D_WIN32_WINNT=0x0A00 /DNTDDI_VERSION=0x0A000000 /fsanitize=address") set(VCPKG_CXX_FLAGS "/MP /DWIN32 /D_WINDOWS /DWINAPI_FAMILY=100 /DWINVER=0x0A00 /D_WIN32_WINNT=0x0A00 /DNTDDI_VERSION=0x0A000000 /fsanitize=address /Zc:__cplusplus /GR- /we4541") -list(APPEND VCPKG_CMAKE_CONFIGURE_OPTIONS --compile-no-warning-as-error) +list(APPEND VCPKG_CMAKE_CONFIGURE_OPTIONS --compile-no-warning-as-error -DCMAKE_CXX_STANDARD=17) if(PORT MATCHES "onnx") list(APPEND VCPKG_CMAKE_CONFIGURE_OPTIONS "-DONNX_DISABLE_STATIC_REGISTRATION=ON" diff --git a/cmake/vcpkg-triplets/asan_nortti/x64-windows-static.cmake b/cmake/vcpkg-triplets/asan_nortti/x64-windows-static.cmake index ccdb919b3e3ee..d64f20d3ce7f6 100644 --- a/cmake/vcpkg-triplets/asan_nortti/x64-windows-static.cmake +++ b/cmake/vcpkg-triplets/asan_nortti/x64-windows-static.cmake @@ -5,7 +5,7 @@ set(VCPKG_CRT_LINKAGE static) set(VCPKG_LIBRARY_LINKAGE static) set(VCPKG_C_FLAGS "/MP /DWIN32 /D_WINDOWS /DWINAPI_FAMILY=100 /DWINVER=0x0A00 /D_WIN32_WINNT=0x0A00 /DNTDDI_VERSION=0x0A000000 /fsanitize=address") set(VCPKG_CXX_FLAGS "/MP /DWIN32 /D_WINDOWS /DWINAPI_FAMILY=100 /DWINVER=0x0A00 /D_WIN32_WINNT=0x0A00 /DNTDDI_VERSION=0x0A000000 /fsanitize=address /Zc:__cplusplus /GR- /we4541") -list(APPEND VCPKG_CMAKE_CONFIGURE_OPTIONS --compile-no-warning-as-error) +list(APPEND VCPKG_CMAKE_CONFIGURE_OPTIONS --compile-no-warning-as-error -DCMAKE_CXX_STANDARD=17) if(PORT MATCHES "onnx") list(APPEND VCPKG_CMAKE_CONFIGURE_OPTIONS "-DONNX_DISABLE_STATIC_REGISTRATION=ON" diff --git a/cmake/vcpkg-triplets/asan_nortti/x86-windows-static-md.cmake b/cmake/vcpkg-triplets/asan_nortti/x86-windows-static-md.cmake index 7a6b45666a924..24ddfa43c0f59 100644 --- a/cmake/vcpkg-triplets/asan_nortti/x86-windows-static-md.cmake +++ b/cmake/vcpkg-triplets/asan_nortti/x86-windows-static-md.cmake @@ -5,7 +5,7 @@ set(VCPKG_CRT_LINKAGE dynamic) set(VCPKG_LIBRARY_LINKAGE static) set(VCPKG_C_FLAGS "/MP /DWIN32 /D_WINDOWS /DWINAPI_FAMILY=100 /DWINVER=0x0A00 /D_WIN32_WINNT=0x0A00 /DNTDDI_VERSION=0x0A000000 /fsanitize=address") set(VCPKG_CXX_FLAGS "/MP /DWIN32 /D_WINDOWS /DWINAPI_FAMILY=100 /DWINVER=0x0A00 /D_WIN32_WINNT=0x0A00 /DNTDDI_VERSION=0x0A000000 /fsanitize=address /Zc:__cplusplus /GR- /we4541") -list(APPEND VCPKG_CMAKE_CONFIGURE_OPTIONS --compile-no-warning-as-error) +list(APPEND VCPKG_CMAKE_CONFIGURE_OPTIONS --compile-no-warning-as-error -DCMAKE_CXX_STANDARD=17) if(PORT MATCHES "onnx") list(APPEND VCPKG_CMAKE_CONFIGURE_OPTIONS "-DONNX_DISABLE_STATIC_REGISTRATION=ON" diff --git a/cmake/vcpkg-triplets/asan_nortti/x86-windows-static.cmake b/cmake/vcpkg-triplets/asan_nortti/x86-windows-static.cmake index 96b2a2ad749b8..53fcb44313c26 100644 --- a/cmake/vcpkg-triplets/asan_nortti/x86-windows-static.cmake +++ b/cmake/vcpkg-triplets/asan_nortti/x86-windows-static.cmake @@ -5,7 +5,7 @@ set(VCPKG_CRT_LINKAGE static) set(VCPKG_LIBRARY_LINKAGE static) set(VCPKG_C_FLAGS "/MP /DWIN32 /D_WINDOWS /DWINAPI_FAMILY=100 /DWINVER=0x0A00 /D_WIN32_WINNT=0x0A00 /DNTDDI_VERSION=0x0A000000 /fsanitize=address") set(VCPKG_CXX_FLAGS "/MP /DWIN32 /D_WINDOWS /DWINAPI_FAMILY=100 /DWINVER=0x0A00 /D_WIN32_WINNT=0x0A00 /DNTDDI_VERSION=0x0A000000 /fsanitize=address /Zc:__cplusplus /GR- /we4541") -list(APPEND VCPKG_CMAKE_CONFIGURE_OPTIONS --compile-no-warning-as-error) +list(APPEND VCPKG_CMAKE_CONFIGURE_OPTIONS --compile-no-warning-as-error -DCMAKE_CXX_STANDARD=17) if(PORT MATCHES "onnx") list(APPEND VCPKG_CMAKE_CONFIGURE_OPTIONS "-DONNX_DISABLE_STATIC_REGISTRATION=ON" diff --git a/cmake/vcpkg-triplets/binskim/arm64-linux.cmake b/cmake/vcpkg-triplets/binskim/arm64-linux.cmake index 4b738553e0fbc..8a3cf645d7f5f 100644 --- a/cmake/vcpkg-triplets/binskim/arm64-linux.cmake +++ b/cmake/vcpkg-triplets/binskim/arm64-linux.cmake @@ -3,12 +3,15 @@ set(VCPKG_TARGET_ARCHITECTURE arm64) set(VCPKG_CRT_LINKAGE dynamic) set(VCPKG_LIBRARY_LINKAGE static) -set(VCPKG_C_FLAGS "-Wp,-D_FORTIFY_SOURCE=2 -Wp,-D_GLIBCXX_ASSERTIONS -fstack-protector-strong") -set(VCPKG_CXX_FLAGS "-Wp,-D_FORTIFY_SOURCE=2 -Wp,-D_GLIBCXX_ASSERTIONS -fstack-protector-strong") +set(VCPKG_C_FLAGS "-g") +set(VCPKG_CXX_FLAGS "-g") +set(VCPKG_C_FLAGS_RELEASE "-DNDEBUG -O3 -Wp,-D_FORTIFY_SOURCE=2 -Wp,-D_GLIBCXX_ASSERTIONS -fstack-protector-strong") +set(VCPKG_CXX_FLAGS_RELEASE "-DNDEBUG -O3 -Wp,-D_FORTIFY_SOURCE=2 -Wp,-D_GLIBCXX_ASSERTIONS -fstack-protector-strong") set(VCPKG_CMAKE_SYSTEM_NAME Linux) set(CMAKE_POSITION_INDEPENDENT_CODE ON) list(APPEND VCPKG_CMAKE_CONFIGURE_OPTIONS --compile-no-warning-as-error -DBENCHMARK_ENABLE_WERROR=OFF) -set(VCPKG_LINKER_FLAGS "-Wl,-Bsymbolic-functions -Wl,-z,relro -Wl,-z,now -Wl,-z,noexecstack") +set(VCPKG_LINKER_FLAGS "-Wl,-Bsymbolic-functions -Wl,-z,relro -Wl,-z,now -Wl,-z,noexecstack -g") +list(APPEND VCPKG_CMAKE_CONFIGURE_OPTIONS -DCMAKE_CXX_STANDARD=17) if(PORT MATCHES "onnx") list(APPEND VCPKG_CMAKE_CONFIGURE_OPTIONS "-DONNX_DISABLE_STATIC_REGISTRATION=ON" diff --git a/cmake/vcpkg-triplets/binskim/arm64-osx.cmake b/cmake/vcpkg-triplets/binskim/arm64-osx.cmake index 4b6999874b111..9892a3eac80e8 100644 --- a/cmake/vcpkg-triplets/binskim/arm64-osx.cmake +++ b/cmake/vcpkg-triplets/binskim/arm64-osx.cmake @@ -3,12 +3,16 @@ set(VCPKG_TARGET_ARCHITECTURE arm64) set(VCPKG_CRT_LINKAGE dynamic) set(VCPKG_LIBRARY_LINKAGE static) -set(VCPKG_C_FLAGS "-Wp,-D_FORTIFY_SOURCE=2 -Wp,-D_GLIBCXX_ASSERTIONS -fstack-protector-strong") -set(VCPKG_CXX_FLAGS "-Wp,-D_FORTIFY_SOURCE=2 -Wp,-D_GLIBCXX_ASSERTIONS -fstack-protector-strong") +set(VCPKG_C_FLAGS "-g") +set(VCPKG_CXX_FLAGS "-g") +set(VCPKG_C_FLAGS_RELEASE "-DNDEBUG -O3 -Wp,-D_FORTIFY_SOURCE=2 -Wp,-D_GLIBCXX_ASSERTIONS -fstack-protector-strong") +set(VCPKG_CXX_FLAGS_RELEASE "-DNDEBUG -O3 -Wp,-D_FORTIFY_SOURCE=2 -Wp,-D_GLIBCXX_ASSERTIONS -fstack-protector-strong") set(VCPKG_CMAKE_SYSTEM_NAME Darwin) set(VCPKG_OSX_ARCHITECTURES "arm64") set(CMAKE_POSITION_INDEPENDENT_CODE ON) list(APPEND VCPKG_CMAKE_CONFIGURE_OPTIONS --compile-no-warning-as-error -DBENCHMARK_ENABLE_WERROR=OFF) +set(VCPKG_LINKER_FLAGS "-g") +list(APPEND VCPKG_CMAKE_CONFIGURE_OPTIONS -DCMAKE_CXX_STANDARD=20) if(PORT MATCHES "onnx") list(APPEND VCPKG_CMAKE_CONFIGURE_OPTIONS "-DONNX_DISABLE_STATIC_REGISTRATION=ON" diff --git a/cmake/vcpkg-triplets/binskim/arm64-windows-static-md.cmake b/cmake/vcpkg-triplets/binskim/arm64-windows-static-md.cmake index 89dfae4bcbf26..3818356b5c0ce 100644 --- a/cmake/vcpkg-triplets/binskim/arm64-windows-static-md.cmake +++ b/cmake/vcpkg-triplets/binskim/arm64-windows-static-md.cmake @@ -5,7 +5,7 @@ set(VCPKG_CRT_LINKAGE dynamic) set(VCPKG_LIBRARY_LINKAGE static) set(VCPKG_C_FLAGS "/MP /DWIN32 /D_WINDOWS /DWINAPI_FAMILY=100 /DWINVER=0x0A00 /D_WIN32_WINNT=0x0A00 /DNTDDI_VERSION=0x0A000000 /guard:cf /Qspectre") set(VCPKG_CXX_FLAGS "/MP /DWIN32 /D_WINDOWS /DWINAPI_FAMILY=100 /DWINVER=0x0A00 /D_WIN32_WINNT=0x0A00 /DNTDDI_VERSION=0x0A000000 /guard:cf /Qspectre /Zc:__cplusplus") -list(APPEND VCPKG_CMAKE_CONFIGURE_OPTIONS --compile-no-warning-as-error) +list(APPEND VCPKG_CMAKE_CONFIGURE_OPTIONS --compile-no-warning-as-error -DCMAKE_CXX_STANDARD=17) set(VCPKG_LINKER_FLAGS "/profile /DYNAMICBASE") if(PORT MATCHES "onnx") list(APPEND VCPKG_CMAKE_CONFIGURE_OPTIONS diff --git a/cmake/vcpkg-triplets/binskim/arm64-windows-static.cmake b/cmake/vcpkg-triplets/binskim/arm64-windows-static.cmake index 28ef65c4d1227..ab38e9f9a9f18 100644 --- a/cmake/vcpkg-triplets/binskim/arm64-windows-static.cmake +++ b/cmake/vcpkg-triplets/binskim/arm64-windows-static.cmake @@ -5,7 +5,7 @@ set(VCPKG_CRT_LINKAGE static) set(VCPKG_LIBRARY_LINKAGE static) set(VCPKG_C_FLAGS "/MP /DWIN32 /D_WINDOWS /DWINAPI_FAMILY=100 /DWINVER=0x0A00 /D_WIN32_WINNT=0x0A00 /DNTDDI_VERSION=0x0A000000 /guard:cf /Qspectre") set(VCPKG_CXX_FLAGS "/MP /DWIN32 /D_WINDOWS /DWINAPI_FAMILY=100 /DWINVER=0x0A00 /D_WIN32_WINNT=0x0A00 /DNTDDI_VERSION=0x0A000000 /guard:cf /Qspectre /Zc:__cplusplus") -list(APPEND VCPKG_CMAKE_CONFIGURE_OPTIONS --compile-no-warning-as-error) +list(APPEND VCPKG_CMAKE_CONFIGURE_OPTIONS --compile-no-warning-as-error -DCMAKE_CXX_STANDARD=17) set(VCPKG_LINKER_FLAGS "/profile /DYNAMICBASE") if(PORT MATCHES "onnx") list(APPEND VCPKG_CMAKE_CONFIGURE_OPTIONS diff --git a/cmake/vcpkg-triplets/binskim/arm64ec-windows-static-md.cmake b/cmake/vcpkg-triplets/binskim/arm64ec-windows-static-md.cmake index 0c087aa1b59f7..6937aea847a8a 100644 --- a/cmake/vcpkg-triplets/binskim/arm64ec-windows-static-md.cmake +++ b/cmake/vcpkg-triplets/binskim/arm64ec-windows-static-md.cmake @@ -5,7 +5,7 @@ set(VCPKG_CRT_LINKAGE dynamic) set(VCPKG_LIBRARY_LINKAGE static) set(VCPKG_C_FLAGS "/MP /DWIN32 /D_WINDOWS /DWINAPI_FAMILY=100 /DWINVER=0x0A00 /D_WIN32_WINNT=0x0A00 /DNTDDI_VERSION=0x0A000000 /guard:cf /Qspectre") set(VCPKG_CXX_FLAGS "/MP /DWIN32 /D_WINDOWS /DWINAPI_FAMILY=100 /DWINVER=0x0A00 /D_WIN32_WINNT=0x0A00 /DNTDDI_VERSION=0x0A000000 /guard:cf /Qspectre /Zc:__cplusplus") -list(APPEND VCPKG_CMAKE_CONFIGURE_OPTIONS --compile-no-warning-as-error) +list(APPEND VCPKG_CMAKE_CONFIGURE_OPTIONS --compile-no-warning-as-error -DCMAKE_CXX_STANDARD=17) set(VCPKG_LINKER_FLAGS "/profile /DYNAMICBASE") if(PORT MATCHES "onnx") list(APPEND VCPKG_CMAKE_CONFIGURE_OPTIONS diff --git a/cmake/vcpkg-triplets/binskim/arm64ec-windows-static.cmake b/cmake/vcpkg-triplets/binskim/arm64ec-windows-static.cmake index 8c7de3b8a97f9..84c0531033699 100644 --- a/cmake/vcpkg-triplets/binskim/arm64ec-windows-static.cmake +++ b/cmake/vcpkg-triplets/binskim/arm64ec-windows-static.cmake @@ -5,7 +5,7 @@ set(VCPKG_CRT_LINKAGE static) set(VCPKG_LIBRARY_LINKAGE static) set(VCPKG_C_FLAGS "/MP /DWIN32 /D_WINDOWS /DWINAPI_FAMILY=100 /DWINVER=0x0A00 /D_WIN32_WINNT=0x0A00 /DNTDDI_VERSION=0x0A000000 /guard:cf /Qspectre") set(VCPKG_CXX_FLAGS "/MP /DWIN32 /D_WINDOWS /DWINAPI_FAMILY=100 /DWINVER=0x0A00 /D_WIN32_WINNT=0x0A00 /DNTDDI_VERSION=0x0A000000 /guard:cf /Qspectre /Zc:__cplusplus") -list(APPEND VCPKG_CMAKE_CONFIGURE_OPTIONS --compile-no-warning-as-error) +list(APPEND VCPKG_CMAKE_CONFIGURE_OPTIONS --compile-no-warning-as-error -DCMAKE_CXX_STANDARD=17) set(VCPKG_LINKER_FLAGS "/profile /DYNAMICBASE") if(PORT MATCHES "onnx") list(APPEND VCPKG_CMAKE_CONFIGURE_OPTIONS diff --git a/cmake/vcpkg-triplets/binskim/universal2-osx.cmake b/cmake/vcpkg-triplets/binskim/universal2-osx.cmake index 60826f1ede770..da4c6abb39000 100644 --- a/cmake/vcpkg-triplets/binskim/universal2-osx.cmake +++ b/cmake/vcpkg-triplets/binskim/universal2-osx.cmake @@ -3,12 +3,16 @@ set(VCPKG_TARGET_ARCHITECTURE x64) set(VCPKG_CRT_LINKAGE dynamic) set(VCPKG_LIBRARY_LINKAGE static) -set(VCPKG_C_FLAGS "-Wp,-D_FORTIFY_SOURCE=2 -Wp,-D_GLIBCXX_ASSERTIONS -fstack-protector-strong") -set(VCPKG_CXX_FLAGS "-Wp,-D_FORTIFY_SOURCE=2 -Wp,-D_GLIBCXX_ASSERTIONS -fstack-protector-strong") +set(VCPKG_C_FLAGS "-g") +set(VCPKG_CXX_FLAGS "-g") +set(VCPKG_C_FLAGS_RELEASE "-DNDEBUG -O3 -Wp,-D_FORTIFY_SOURCE=2 -Wp,-D_GLIBCXX_ASSERTIONS -fstack-protector-strong") +set(VCPKG_CXX_FLAGS_RELEASE "-DNDEBUG -O3 -Wp,-D_FORTIFY_SOURCE=2 -Wp,-D_GLIBCXX_ASSERTIONS -fstack-protector-strong") set(VCPKG_CMAKE_SYSTEM_NAME Darwin) set(VCPKG_OSX_ARCHITECTURES "x86_64;arm64") set(CMAKE_POSITION_INDEPENDENT_CODE ON) list(APPEND VCPKG_CMAKE_CONFIGURE_OPTIONS --compile-no-warning-as-error -DBENCHMARK_ENABLE_WERROR=OFF) +set(VCPKG_LINKER_FLAGS "-g") +list(APPEND VCPKG_CMAKE_CONFIGURE_OPTIONS -DCMAKE_CXX_STANDARD=20) if(PORT MATCHES "onnx") list(APPEND VCPKG_CMAKE_CONFIGURE_OPTIONS "-DONNX_DISABLE_STATIC_REGISTRATION=ON" diff --git a/cmake/vcpkg-triplets/binskim/x64-linux.cmake b/cmake/vcpkg-triplets/binskim/x64-linux.cmake index 8d7aeb2342e26..e3d4d34326409 100644 --- a/cmake/vcpkg-triplets/binskim/x64-linux.cmake +++ b/cmake/vcpkg-triplets/binskim/x64-linux.cmake @@ -3,12 +3,15 @@ set(VCPKG_TARGET_ARCHITECTURE x64) set(VCPKG_CRT_LINKAGE dynamic) set(VCPKG_LIBRARY_LINKAGE static) -set(VCPKG_C_FLAGS "-Wp,-D_FORTIFY_SOURCE=2 -Wp,-D_GLIBCXX_ASSERTIONS -fstack-protector-strong -fstack-clash-protection -fcf-protection") -set(VCPKG_CXX_FLAGS "-Wp,-D_FORTIFY_SOURCE=2 -Wp,-D_GLIBCXX_ASSERTIONS -fstack-protector-strong -fstack-clash-protection -fcf-protection") +set(VCPKG_C_FLAGS "-g") +set(VCPKG_CXX_FLAGS "-g") +set(VCPKG_C_FLAGS_RELEASE "-DNDEBUG -O3 -Wp,-D_FORTIFY_SOURCE=2 -Wp,-D_GLIBCXX_ASSERTIONS -fstack-protector-strong -fstack-clash-protection -fcf-protection") +set(VCPKG_CXX_FLAGS_RELEASE "-DNDEBUG -O3 -Wp,-D_FORTIFY_SOURCE=2 -Wp,-D_GLIBCXX_ASSERTIONS -fstack-protector-strong -fstack-clash-protection -fcf-protection") set(VCPKG_CMAKE_SYSTEM_NAME Linux) set(CMAKE_POSITION_INDEPENDENT_CODE ON) list(APPEND VCPKG_CMAKE_CONFIGURE_OPTIONS --compile-no-warning-as-error -DBENCHMARK_ENABLE_WERROR=OFF) -set(VCPKG_LINKER_FLAGS "-Wl,-Bsymbolic-functions -Wl,-z,relro -Wl,-z,now -Wl,-z,noexecstack") +set(VCPKG_LINKER_FLAGS "-Wl,-Bsymbolic-functions -Wl,-z,relro -Wl,-z,now -Wl,-z,noexecstack -g") +list(APPEND VCPKG_CMAKE_CONFIGURE_OPTIONS -DCMAKE_CXX_STANDARD=17) if(PORT MATCHES "onnx") list(APPEND VCPKG_CMAKE_CONFIGURE_OPTIONS "-DONNX_DISABLE_STATIC_REGISTRATION=ON" diff --git a/cmake/vcpkg-triplets/binskim/x64-osx.cmake b/cmake/vcpkg-triplets/binskim/x64-osx.cmake index e391ab9eaee6d..426a35e33f747 100644 --- a/cmake/vcpkg-triplets/binskim/x64-osx.cmake +++ b/cmake/vcpkg-triplets/binskim/x64-osx.cmake @@ -3,12 +3,16 @@ set(VCPKG_TARGET_ARCHITECTURE x64) set(VCPKG_CRT_LINKAGE dynamic) set(VCPKG_LIBRARY_LINKAGE static) -set(VCPKG_C_FLAGS "-Wp,-D_FORTIFY_SOURCE=2 -Wp,-D_GLIBCXX_ASSERTIONS -fstack-protector-strong -fstack-clash-protection -fcf-protection") -set(VCPKG_CXX_FLAGS "-Wp,-D_FORTIFY_SOURCE=2 -Wp,-D_GLIBCXX_ASSERTIONS -fstack-protector-strong -fstack-clash-protection -fcf-protection") +set(VCPKG_C_FLAGS "-g") +set(VCPKG_CXX_FLAGS "-g") +set(VCPKG_C_FLAGS_RELEASE "-DNDEBUG -O3 -Wp,-D_FORTIFY_SOURCE=2 -Wp,-D_GLIBCXX_ASSERTIONS -fstack-protector-strong -fstack-clash-protection -fcf-protection") +set(VCPKG_CXX_FLAGS_RELEASE "-DNDEBUG -O3 -Wp,-D_FORTIFY_SOURCE=2 -Wp,-D_GLIBCXX_ASSERTIONS -fstack-protector-strong -fstack-clash-protection -fcf-protection") set(VCPKG_CMAKE_SYSTEM_NAME Darwin) set(VCPKG_OSX_ARCHITECTURES "x86_64") set(CMAKE_POSITION_INDEPENDENT_CODE ON) list(APPEND VCPKG_CMAKE_CONFIGURE_OPTIONS --compile-no-warning-as-error -DBENCHMARK_ENABLE_WERROR=OFF) +set(VCPKG_LINKER_FLAGS "-g") +list(APPEND VCPKG_CMAKE_CONFIGURE_OPTIONS -DCMAKE_CXX_STANDARD=20) if(PORT MATCHES "onnx") list(APPEND VCPKG_CMAKE_CONFIGURE_OPTIONS "-DONNX_DISABLE_STATIC_REGISTRATION=ON" diff --git a/cmake/vcpkg-triplets/binskim/x64-windows-static-md.cmake b/cmake/vcpkg-triplets/binskim/x64-windows-static-md.cmake index ef67223cd0cd3..0f600d7931076 100644 --- a/cmake/vcpkg-triplets/binskim/x64-windows-static-md.cmake +++ b/cmake/vcpkg-triplets/binskim/x64-windows-static-md.cmake @@ -5,7 +5,7 @@ set(VCPKG_CRT_LINKAGE dynamic) set(VCPKG_LIBRARY_LINKAGE static) set(VCPKG_C_FLAGS "/MP /DWIN32 /D_WINDOWS /DWINAPI_FAMILY=100 /DWINVER=0x0A00 /D_WIN32_WINNT=0x0A00 /DNTDDI_VERSION=0x0A000000 /guard:cf /Qspectre") set(VCPKG_CXX_FLAGS "/MP /DWIN32 /D_WINDOWS /DWINAPI_FAMILY=100 /DWINVER=0x0A00 /D_WIN32_WINNT=0x0A00 /DNTDDI_VERSION=0x0A000000 /guard:cf /Qspectre /Zc:__cplusplus") -list(APPEND VCPKG_CMAKE_CONFIGURE_OPTIONS --compile-no-warning-as-error) +list(APPEND VCPKG_CMAKE_CONFIGURE_OPTIONS --compile-no-warning-as-error -DCMAKE_CXX_STANDARD=17) set(VCPKG_LINKER_FLAGS "/profile /DYNAMICBASE") if(PORT MATCHES "onnx") list(APPEND VCPKG_CMAKE_CONFIGURE_OPTIONS diff --git a/cmake/vcpkg-triplets/binskim/x64-windows-static.cmake b/cmake/vcpkg-triplets/binskim/x64-windows-static.cmake index 62948a156c911..17d41775c9d06 100644 --- a/cmake/vcpkg-triplets/binskim/x64-windows-static.cmake +++ b/cmake/vcpkg-triplets/binskim/x64-windows-static.cmake @@ -5,7 +5,7 @@ set(VCPKG_CRT_LINKAGE static) set(VCPKG_LIBRARY_LINKAGE static) set(VCPKG_C_FLAGS "/MP /DWIN32 /D_WINDOWS /DWINAPI_FAMILY=100 /DWINVER=0x0A00 /D_WIN32_WINNT=0x0A00 /DNTDDI_VERSION=0x0A000000 /guard:cf /Qspectre") set(VCPKG_CXX_FLAGS "/MP /DWIN32 /D_WINDOWS /DWINAPI_FAMILY=100 /DWINVER=0x0A00 /D_WIN32_WINNT=0x0A00 /DNTDDI_VERSION=0x0A000000 /guard:cf /Qspectre /Zc:__cplusplus") -list(APPEND VCPKG_CMAKE_CONFIGURE_OPTIONS --compile-no-warning-as-error) +list(APPEND VCPKG_CMAKE_CONFIGURE_OPTIONS --compile-no-warning-as-error -DCMAKE_CXX_STANDARD=17) set(VCPKG_LINKER_FLAGS "/profile /DYNAMICBASE") if(PORT MATCHES "onnx") list(APPEND VCPKG_CMAKE_CONFIGURE_OPTIONS diff --git a/cmake/vcpkg-triplets/binskim/x86-windows-static-md.cmake b/cmake/vcpkg-triplets/binskim/x86-windows-static-md.cmake index 8ac022c7eee4c..cb981c264a2f1 100644 --- a/cmake/vcpkg-triplets/binskim/x86-windows-static-md.cmake +++ b/cmake/vcpkg-triplets/binskim/x86-windows-static-md.cmake @@ -5,7 +5,7 @@ set(VCPKG_CRT_LINKAGE dynamic) set(VCPKG_LIBRARY_LINKAGE static) set(VCPKG_C_FLAGS "/MP /DWIN32 /D_WINDOWS /DWINAPI_FAMILY=100 /DWINVER=0x0A00 /D_WIN32_WINNT=0x0A00 /DNTDDI_VERSION=0x0A000000 /guard:cf /Qspectre") set(VCPKG_CXX_FLAGS "/MP /DWIN32 /D_WINDOWS /DWINAPI_FAMILY=100 /DWINVER=0x0A00 /D_WIN32_WINNT=0x0A00 /DNTDDI_VERSION=0x0A000000 /guard:cf /Qspectre /Zc:__cplusplus") -list(APPEND VCPKG_CMAKE_CONFIGURE_OPTIONS --compile-no-warning-as-error) +list(APPEND VCPKG_CMAKE_CONFIGURE_OPTIONS --compile-no-warning-as-error -DCMAKE_CXX_STANDARD=17) set(VCPKG_LINKER_FLAGS "/profile /DYNAMICBASE") if(PORT MATCHES "onnx") list(APPEND VCPKG_CMAKE_CONFIGURE_OPTIONS diff --git a/cmake/vcpkg-triplets/binskim/x86-windows-static.cmake b/cmake/vcpkg-triplets/binskim/x86-windows-static.cmake index 8fd2d29dc3d99..53342263d5ada 100644 --- a/cmake/vcpkg-triplets/binskim/x86-windows-static.cmake +++ b/cmake/vcpkg-triplets/binskim/x86-windows-static.cmake @@ -5,7 +5,7 @@ set(VCPKG_CRT_LINKAGE static) set(VCPKG_LIBRARY_LINKAGE static) set(VCPKG_C_FLAGS "/MP /DWIN32 /D_WINDOWS /DWINAPI_FAMILY=100 /DWINVER=0x0A00 /D_WIN32_WINNT=0x0A00 /DNTDDI_VERSION=0x0A000000 /guard:cf /Qspectre") set(VCPKG_CXX_FLAGS "/MP /DWIN32 /D_WINDOWS /DWINAPI_FAMILY=100 /DWINVER=0x0A00 /D_WIN32_WINNT=0x0A00 /DNTDDI_VERSION=0x0A000000 /guard:cf /Qspectre /Zc:__cplusplus") -list(APPEND VCPKG_CMAKE_CONFIGURE_OPTIONS --compile-no-warning-as-error) +list(APPEND VCPKG_CMAKE_CONFIGURE_OPTIONS --compile-no-warning-as-error -DCMAKE_CXX_STANDARD=17) set(VCPKG_LINKER_FLAGS "/profile /DYNAMICBASE") if(PORT MATCHES "onnx") list(APPEND VCPKG_CMAKE_CONFIGURE_OPTIONS diff --git a/cmake/vcpkg-triplets/binskim_nortti/arm64-linux.cmake b/cmake/vcpkg-triplets/binskim_nortti/arm64-linux.cmake index c9787f460b78d..203c85fa3a59e 100644 --- a/cmake/vcpkg-triplets/binskim_nortti/arm64-linux.cmake +++ b/cmake/vcpkg-triplets/binskim_nortti/arm64-linux.cmake @@ -3,12 +3,15 @@ set(VCPKG_TARGET_ARCHITECTURE arm64) set(VCPKG_CRT_LINKAGE dynamic) set(VCPKG_LIBRARY_LINKAGE static) -set(VCPKG_C_FLAGS "-Wp,-D_FORTIFY_SOURCE=2 -Wp,-D_GLIBCXX_ASSERTIONS -fstack-protector-strong -DEMSCRIPTEN_HAS_UNBOUND_TYPE_NAMES=0") -set(VCPKG_CXX_FLAGS "-Wp,-D_FORTIFY_SOURCE=2 -Wp,-D_GLIBCXX_ASSERTIONS -fstack-protector-strong -DEMSCRIPTEN_HAS_UNBOUND_TYPE_NAMES=0 -fno-rtti") +set(VCPKG_C_FLAGS "-g -DEMSCRIPTEN_HAS_UNBOUND_TYPE_NAMES=0") +set(VCPKG_CXX_FLAGS "-g -DEMSCRIPTEN_HAS_UNBOUND_TYPE_NAMES=0 -fno-rtti") +set(VCPKG_C_FLAGS_RELEASE "-DNDEBUG -O3 -Wp,-D_FORTIFY_SOURCE=2 -Wp,-D_GLIBCXX_ASSERTIONS -fstack-protector-strong") +set(VCPKG_CXX_FLAGS_RELEASE "-DNDEBUG -O3 -Wp,-D_FORTIFY_SOURCE=2 -Wp,-D_GLIBCXX_ASSERTIONS -fstack-protector-strong") set(VCPKG_CMAKE_SYSTEM_NAME Linux) set(CMAKE_POSITION_INDEPENDENT_CODE ON) list(APPEND VCPKG_CMAKE_CONFIGURE_OPTIONS --compile-no-warning-as-error -DBENCHMARK_ENABLE_WERROR=OFF) -set(VCPKG_LINKER_FLAGS "-Wl,-Bsymbolic-functions -Wl,-z,relro -Wl,-z,now -Wl,-z,noexecstack") +set(VCPKG_LINKER_FLAGS "-Wl,-Bsymbolic-functions -Wl,-z,relro -Wl,-z,now -Wl,-z,noexecstack -g") +list(APPEND VCPKG_CMAKE_CONFIGURE_OPTIONS -DCMAKE_CXX_STANDARD=17) if(PORT MATCHES "onnx") list(APPEND VCPKG_CMAKE_CONFIGURE_OPTIONS "-DONNX_DISABLE_STATIC_REGISTRATION=ON" diff --git a/cmake/vcpkg-triplets/binskim_nortti/arm64-osx.cmake b/cmake/vcpkg-triplets/binskim_nortti/arm64-osx.cmake index f5866d6863cb7..c57a2401e4c0f 100644 --- a/cmake/vcpkg-triplets/binskim_nortti/arm64-osx.cmake +++ b/cmake/vcpkg-triplets/binskim_nortti/arm64-osx.cmake @@ -3,12 +3,16 @@ set(VCPKG_TARGET_ARCHITECTURE arm64) set(VCPKG_CRT_LINKAGE dynamic) set(VCPKG_LIBRARY_LINKAGE static) -set(VCPKG_C_FLAGS "-Wp,-D_FORTIFY_SOURCE=2 -Wp,-D_GLIBCXX_ASSERTIONS -fstack-protector-strong -DEMSCRIPTEN_HAS_UNBOUND_TYPE_NAMES=0") -set(VCPKG_CXX_FLAGS "-Wp,-D_FORTIFY_SOURCE=2 -Wp,-D_GLIBCXX_ASSERTIONS -fstack-protector-strong -DEMSCRIPTEN_HAS_UNBOUND_TYPE_NAMES=0 -fno-rtti") +set(VCPKG_C_FLAGS "-g -DEMSCRIPTEN_HAS_UNBOUND_TYPE_NAMES=0") +set(VCPKG_CXX_FLAGS "-g -DEMSCRIPTEN_HAS_UNBOUND_TYPE_NAMES=0 -fno-rtti") +set(VCPKG_C_FLAGS_RELEASE "-DNDEBUG -O3 -Wp,-D_FORTIFY_SOURCE=2 -Wp,-D_GLIBCXX_ASSERTIONS -fstack-protector-strong") +set(VCPKG_CXX_FLAGS_RELEASE "-DNDEBUG -O3 -Wp,-D_FORTIFY_SOURCE=2 -Wp,-D_GLIBCXX_ASSERTIONS -fstack-protector-strong") set(VCPKG_CMAKE_SYSTEM_NAME Darwin) set(VCPKG_OSX_ARCHITECTURES "arm64") set(CMAKE_POSITION_INDEPENDENT_CODE ON) list(APPEND VCPKG_CMAKE_CONFIGURE_OPTIONS --compile-no-warning-as-error -DBENCHMARK_ENABLE_WERROR=OFF) +set(VCPKG_LINKER_FLAGS "-g") +list(APPEND VCPKG_CMAKE_CONFIGURE_OPTIONS -DCMAKE_CXX_STANDARD=20) if(PORT MATCHES "onnx") list(APPEND VCPKG_CMAKE_CONFIGURE_OPTIONS "-DONNX_DISABLE_STATIC_REGISTRATION=ON" diff --git a/cmake/vcpkg-triplets/binskim_nortti/arm64-windows-static-md.cmake b/cmake/vcpkg-triplets/binskim_nortti/arm64-windows-static-md.cmake index 927b110c98d45..9963cfb66f4b1 100644 --- a/cmake/vcpkg-triplets/binskim_nortti/arm64-windows-static-md.cmake +++ b/cmake/vcpkg-triplets/binskim_nortti/arm64-windows-static-md.cmake @@ -5,7 +5,7 @@ set(VCPKG_CRT_LINKAGE dynamic) set(VCPKG_LIBRARY_LINKAGE static) set(VCPKG_C_FLAGS "/MP /DWIN32 /D_WINDOWS /DWINAPI_FAMILY=100 /DWINVER=0x0A00 /D_WIN32_WINNT=0x0A00 /DNTDDI_VERSION=0x0A000000 /guard:cf /Qspectre") set(VCPKG_CXX_FLAGS "/MP /DWIN32 /D_WINDOWS /DWINAPI_FAMILY=100 /DWINVER=0x0A00 /D_WIN32_WINNT=0x0A00 /DNTDDI_VERSION=0x0A000000 /guard:cf /Qspectre /Zc:__cplusplus /GR- /we4541") -list(APPEND VCPKG_CMAKE_CONFIGURE_OPTIONS --compile-no-warning-as-error) +list(APPEND VCPKG_CMAKE_CONFIGURE_OPTIONS --compile-no-warning-as-error -DCMAKE_CXX_STANDARD=17) set(VCPKG_LINKER_FLAGS "/profile /DYNAMICBASE") if(PORT MATCHES "onnx") list(APPEND VCPKG_CMAKE_CONFIGURE_OPTIONS diff --git a/cmake/vcpkg-triplets/binskim_nortti/arm64-windows-static.cmake b/cmake/vcpkg-triplets/binskim_nortti/arm64-windows-static.cmake index b0419c9a0d0e0..0f4948ff076f1 100644 --- a/cmake/vcpkg-triplets/binskim_nortti/arm64-windows-static.cmake +++ b/cmake/vcpkg-triplets/binskim_nortti/arm64-windows-static.cmake @@ -5,7 +5,7 @@ set(VCPKG_CRT_LINKAGE static) set(VCPKG_LIBRARY_LINKAGE static) set(VCPKG_C_FLAGS "/MP /DWIN32 /D_WINDOWS /DWINAPI_FAMILY=100 /DWINVER=0x0A00 /D_WIN32_WINNT=0x0A00 /DNTDDI_VERSION=0x0A000000 /guard:cf /Qspectre") set(VCPKG_CXX_FLAGS "/MP /DWIN32 /D_WINDOWS /DWINAPI_FAMILY=100 /DWINVER=0x0A00 /D_WIN32_WINNT=0x0A00 /DNTDDI_VERSION=0x0A000000 /guard:cf /Qspectre /Zc:__cplusplus /GR- /we4541") -list(APPEND VCPKG_CMAKE_CONFIGURE_OPTIONS --compile-no-warning-as-error) +list(APPEND VCPKG_CMAKE_CONFIGURE_OPTIONS --compile-no-warning-as-error -DCMAKE_CXX_STANDARD=17) set(VCPKG_LINKER_FLAGS "/profile /DYNAMICBASE") if(PORT MATCHES "onnx") list(APPEND VCPKG_CMAKE_CONFIGURE_OPTIONS diff --git a/cmake/vcpkg-triplets/binskim_nortti/arm64ec-windows-static-md.cmake b/cmake/vcpkg-triplets/binskim_nortti/arm64ec-windows-static-md.cmake index aa8b7a5f0e96b..6a5c8b9f1058a 100644 --- a/cmake/vcpkg-triplets/binskim_nortti/arm64ec-windows-static-md.cmake +++ b/cmake/vcpkg-triplets/binskim_nortti/arm64ec-windows-static-md.cmake @@ -5,7 +5,7 @@ set(VCPKG_CRT_LINKAGE dynamic) set(VCPKG_LIBRARY_LINKAGE static) set(VCPKG_C_FLAGS "/MP /DWIN32 /D_WINDOWS /DWINAPI_FAMILY=100 /DWINVER=0x0A00 /D_WIN32_WINNT=0x0A00 /DNTDDI_VERSION=0x0A000000 /guard:cf /Qspectre") set(VCPKG_CXX_FLAGS "/MP /DWIN32 /D_WINDOWS /DWINAPI_FAMILY=100 /DWINVER=0x0A00 /D_WIN32_WINNT=0x0A00 /DNTDDI_VERSION=0x0A000000 /guard:cf /Qspectre /Zc:__cplusplus /GR- /we4541") -list(APPEND VCPKG_CMAKE_CONFIGURE_OPTIONS --compile-no-warning-as-error) +list(APPEND VCPKG_CMAKE_CONFIGURE_OPTIONS --compile-no-warning-as-error -DCMAKE_CXX_STANDARD=17) set(VCPKG_LINKER_FLAGS "/profile /DYNAMICBASE") if(PORT MATCHES "onnx") list(APPEND VCPKG_CMAKE_CONFIGURE_OPTIONS diff --git a/cmake/vcpkg-triplets/binskim_nortti/arm64ec-windows-static.cmake b/cmake/vcpkg-triplets/binskim_nortti/arm64ec-windows-static.cmake index 96da5d9b1372f..668d4fb4dc420 100644 --- a/cmake/vcpkg-triplets/binskim_nortti/arm64ec-windows-static.cmake +++ b/cmake/vcpkg-triplets/binskim_nortti/arm64ec-windows-static.cmake @@ -5,7 +5,7 @@ set(VCPKG_CRT_LINKAGE static) set(VCPKG_LIBRARY_LINKAGE static) set(VCPKG_C_FLAGS "/MP /DWIN32 /D_WINDOWS /DWINAPI_FAMILY=100 /DWINVER=0x0A00 /D_WIN32_WINNT=0x0A00 /DNTDDI_VERSION=0x0A000000 /guard:cf /Qspectre") set(VCPKG_CXX_FLAGS "/MP /DWIN32 /D_WINDOWS /DWINAPI_FAMILY=100 /DWINVER=0x0A00 /D_WIN32_WINNT=0x0A00 /DNTDDI_VERSION=0x0A000000 /guard:cf /Qspectre /Zc:__cplusplus /GR- /we4541") -list(APPEND VCPKG_CMAKE_CONFIGURE_OPTIONS --compile-no-warning-as-error) +list(APPEND VCPKG_CMAKE_CONFIGURE_OPTIONS --compile-no-warning-as-error -DCMAKE_CXX_STANDARD=17) set(VCPKG_LINKER_FLAGS "/profile /DYNAMICBASE") if(PORT MATCHES "onnx") list(APPEND VCPKG_CMAKE_CONFIGURE_OPTIONS diff --git a/cmake/vcpkg-triplets/binskim_nortti/universal2-osx.cmake b/cmake/vcpkg-triplets/binskim_nortti/universal2-osx.cmake index f4ef6f0c659d8..1956daf30e6d9 100644 --- a/cmake/vcpkg-triplets/binskim_nortti/universal2-osx.cmake +++ b/cmake/vcpkg-triplets/binskim_nortti/universal2-osx.cmake @@ -3,12 +3,16 @@ set(VCPKG_TARGET_ARCHITECTURE x64) set(VCPKG_CRT_LINKAGE dynamic) set(VCPKG_LIBRARY_LINKAGE static) -set(VCPKG_C_FLAGS "-Wp,-D_FORTIFY_SOURCE=2 -Wp,-D_GLIBCXX_ASSERTIONS -fstack-protector-strong -DEMSCRIPTEN_HAS_UNBOUND_TYPE_NAMES=0") -set(VCPKG_CXX_FLAGS "-Wp,-D_FORTIFY_SOURCE=2 -Wp,-D_GLIBCXX_ASSERTIONS -fstack-protector-strong -DEMSCRIPTEN_HAS_UNBOUND_TYPE_NAMES=0 -fno-rtti") +set(VCPKG_C_FLAGS "-g -DEMSCRIPTEN_HAS_UNBOUND_TYPE_NAMES=0") +set(VCPKG_CXX_FLAGS "-g -DEMSCRIPTEN_HAS_UNBOUND_TYPE_NAMES=0 -fno-rtti") +set(VCPKG_C_FLAGS_RELEASE "-DNDEBUG -O3 -Wp,-D_FORTIFY_SOURCE=2 -Wp,-D_GLIBCXX_ASSERTIONS -fstack-protector-strong") +set(VCPKG_CXX_FLAGS_RELEASE "-DNDEBUG -O3 -Wp,-D_FORTIFY_SOURCE=2 -Wp,-D_GLIBCXX_ASSERTIONS -fstack-protector-strong") set(VCPKG_CMAKE_SYSTEM_NAME Darwin) set(VCPKG_OSX_ARCHITECTURES "x86_64;arm64") set(CMAKE_POSITION_INDEPENDENT_CODE ON) list(APPEND VCPKG_CMAKE_CONFIGURE_OPTIONS --compile-no-warning-as-error -DBENCHMARK_ENABLE_WERROR=OFF) +set(VCPKG_LINKER_FLAGS "-g") +list(APPEND VCPKG_CMAKE_CONFIGURE_OPTIONS -DCMAKE_CXX_STANDARD=20) if(PORT MATCHES "onnx") list(APPEND VCPKG_CMAKE_CONFIGURE_OPTIONS "-DONNX_DISABLE_STATIC_REGISTRATION=ON" diff --git a/cmake/vcpkg-triplets/binskim_nortti/x64-linux.cmake b/cmake/vcpkg-triplets/binskim_nortti/x64-linux.cmake index 8fe977fb86e56..da17e0073980f 100644 --- a/cmake/vcpkg-triplets/binskim_nortti/x64-linux.cmake +++ b/cmake/vcpkg-triplets/binskim_nortti/x64-linux.cmake @@ -3,12 +3,15 @@ set(VCPKG_TARGET_ARCHITECTURE x64) set(VCPKG_CRT_LINKAGE dynamic) set(VCPKG_LIBRARY_LINKAGE static) -set(VCPKG_C_FLAGS "-Wp,-D_FORTIFY_SOURCE=2 -Wp,-D_GLIBCXX_ASSERTIONS -fstack-protector-strong -fstack-clash-protection -fcf-protection -DEMSCRIPTEN_HAS_UNBOUND_TYPE_NAMES=0") -set(VCPKG_CXX_FLAGS "-Wp,-D_FORTIFY_SOURCE=2 -Wp,-D_GLIBCXX_ASSERTIONS -fstack-protector-strong -fstack-clash-protection -fcf-protection -DEMSCRIPTEN_HAS_UNBOUND_TYPE_NAMES=0 -fno-rtti") +set(VCPKG_C_FLAGS "-g -DEMSCRIPTEN_HAS_UNBOUND_TYPE_NAMES=0") +set(VCPKG_CXX_FLAGS "-g -DEMSCRIPTEN_HAS_UNBOUND_TYPE_NAMES=0 -fno-rtti") +set(VCPKG_C_FLAGS_RELEASE "-DNDEBUG -O3 -Wp,-D_FORTIFY_SOURCE=2 -Wp,-D_GLIBCXX_ASSERTIONS -fstack-protector-strong -fstack-clash-protection -fcf-protection") +set(VCPKG_CXX_FLAGS_RELEASE "-DNDEBUG -O3 -Wp,-D_FORTIFY_SOURCE=2 -Wp,-D_GLIBCXX_ASSERTIONS -fstack-protector-strong -fstack-clash-protection -fcf-protection") set(VCPKG_CMAKE_SYSTEM_NAME Linux) set(CMAKE_POSITION_INDEPENDENT_CODE ON) list(APPEND VCPKG_CMAKE_CONFIGURE_OPTIONS --compile-no-warning-as-error -DBENCHMARK_ENABLE_WERROR=OFF) -set(VCPKG_LINKER_FLAGS "-Wl,-Bsymbolic-functions -Wl,-z,relro -Wl,-z,now -Wl,-z,noexecstack") +set(VCPKG_LINKER_FLAGS "-Wl,-Bsymbolic-functions -Wl,-z,relro -Wl,-z,now -Wl,-z,noexecstack -g") +list(APPEND VCPKG_CMAKE_CONFIGURE_OPTIONS -DCMAKE_CXX_STANDARD=17) if(PORT MATCHES "onnx") list(APPEND VCPKG_CMAKE_CONFIGURE_OPTIONS "-DONNX_DISABLE_STATIC_REGISTRATION=ON" diff --git a/cmake/vcpkg-triplets/binskim_nortti/x64-osx.cmake b/cmake/vcpkg-triplets/binskim_nortti/x64-osx.cmake index 196018d7cf442..c74e60bc7c206 100644 --- a/cmake/vcpkg-triplets/binskim_nortti/x64-osx.cmake +++ b/cmake/vcpkg-triplets/binskim_nortti/x64-osx.cmake @@ -3,12 +3,16 @@ set(VCPKG_TARGET_ARCHITECTURE x64) set(VCPKG_CRT_LINKAGE dynamic) set(VCPKG_LIBRARY_LINKAGE static) -set(VCPKG_C_FLAGS "-Wp,-D_FORTIFY_SOURCE=2 -Wp,-D_GLIBCXX_ASSERTIONS -fstack-protector-strong -fstack-clash-protection -fcf-protection -DEMSCRIPTEN_HAS_UNBOUND_TYPE_NAMES=0") -set(VCPKG_CXX_FLAGS "-Wp,-D_FORTIFY_SOURCE=2 -Wp,-D_GLIBCXX_ASSERTIONS -fstack-protector-strong -fstack-clash-protection -fcf-protection -DEMSCRIPTEN_HAS_UNBOUND_TYPE_NAMES=0 -fno-rtti") +set(VCPKG_C_FLAGS "-g -DEMSCRIPTEN_HAS_UNBOUND_TYPE_NAMES=0") +set(VCPKG_CXX_FLAGS "-g -DEMSCRIPTEN_HAS_UNBOUND_TYPE_NAMES=0 -fno-rtti") +set(VCPKG_C_FLAGS_RELEASE "-DNDEBUG -O3 -Wp,-D_FORTIFY_SOURCE=2 -Wp,-D_GLIBCXX_ASSERTIONS -fstack-protector-strong -fstack-clash-protection -fcf-protection") +set(VCPKG_CXX_FLAGS_RELEASE "-DNDEBUG -O3 -Wp,-D_FORTIFY_SOURCE=2 -Wp,-D_GLIBCXX_ASSERTIONS -fstack-protector-strong -fstack-clash-protection -fcf-protection") set(VCPKG_CMAKE_SYSTEM_NAME Darwin) set(VCPKG_OSX_ARCHITECTURES "x86_64") set(CMAKE_POSITION_INDEPENDENT_CODE ON) list(APPEND VCPKG_CMAKE_CONFIGURE_OPTIONS --compile-no-warning-as-error -DBENCHMARK_ENABLE_WERROR=OFF) +set(VCPKG_LINKER_FLAGS "-g") +list(APPEND VCPKG_CMAKE_CONFIGURE_OPTIONS -DCMAKE_CXX_STANDARD=20) if(PORT MATCHES "onnx") list(APPEND VCPKG_CMAKE_CONFIGURE_OPTIONS "-DONNX_DISABLE_STATIC_REGISTRATION=ON" diff --git a/cmake/vcpkg-triplets/binskim_nortti/x64-windows-static-md.cmake b/cmake/vcpkg-triplets/binskim_nortti/x64-windows-static-md.cmake index 38b5cbdde2d65..6491d31ae469b 100644 --- a/cmake/vcpkg-triplets/binskim_nortti/x64-windows-static-md.cmake +++ b/cmake/vcpkg-triplets/binskim_nortti/x64-windows-static-md.cmake @@ -5,7 +5,7 @@ set(VCPKG_CRT_LINKAGE dynamic) set(VCPKG_LIBRARY_LINKAGE static) set(VCPKG_C_FLAGS "/MP /DWIN32 /D_WINDOWS /DWINAPI_FAMILY=100 /DWINVER=0x0A00 /D_WIN32_WINNT=0x0A00 /DNTDDI_VERSION=0x0A000000 /guard:cf /Qspectre") set(VCPKG_CXX_FLAGS "/MP /DWIN32 /D_WINDOWS /DWINAPI_FAMILY=100 /DWINVER=0x0A00 /D_WIN32_WINNT=0x0A00 /DNTDDI_VERSION=0x0A000000 /guard:cf /Qspectre /Zc:__cplusplus /GR- /we4541") -list(APPEND VCPKG_CMAKE_CONFIGURE_OPTIONS --compile-no-warning-as-error) +list(APPEND VCPKG_CMAKE_CONFIGURE_OPTIONS --compile-no-warning-as-error -DCMAKE_CXX_STANDARD=17) set(VCPKG_LINKER_FLAGS "/profile /DYNAMICBASE") if(PORT MATCHES "onnx") list(APPEND VCPKG_CMAKE_CONFIGURE_OPTIONS diff --git a/cmake/vcpkg-triplets/binskim_nortti/x64-windows-static.cmake b/cmake/vcpkg-triplets/binskim_nortti/x64-windows-static.cmake index bea970b669f4f..011999df2ac99 100644 --- a/cmake/vcpkg-triplets/binskim_nortti/x64-windows-static.cmake +++ b/cmake/vcpkg-triplets/binskim_nortti/x64-windows-static.cmake @@ -5,7 +5,7 @@ set(VCPKG_CRT_LINKAGE static) set(VCPKG_LIBRARY_LINKAGE static) set(VCPKG_C_FLAGS "/MP /DWIN32 /D_WINDOWS /DWINAPI_FAMILY=100 /DWINVER=0x0A00 /D_WIN32_WINNT=0x0A00 /DNTDDI_VERSION=0x0A000000 /guard:cf /Qspectre") set(VCPKG_CXX_FLAGS "/MP /DWIN32 /D_WINDOWS /DWINAPI_FAMILY=100 /DWINVER=0x0A00 /D_WIN32_WINNT=0x0A00 /DNTDDI_VERSION=0x0A000000 /guard:cf /Qspectre /Zc:__cplusplus /GR- /we4541") -list(APPEND VCPKG_CMAKE_CONFIGURE_OPTIONS --compile-no-warning-as-error) +list(APPEND VCPKG_CMAKE_CONFIGURE_OPTIONS --compile-no-warning-as-error -DCMAKE_CXX_STANDARD=17) set(VCPKG_LINKER_FLAGS "/profile /DYNAMICBASE") if(PORT MATCHES "onnx") list(APPEND VCPKG_CMAKE_CONFIGURE_OPTIONS diff --git a/cmake/vcpkg-triplets/binskim_nortti/x86-windows-static-md.cmake b/cmake/vcpkg-triplets/binskim_nortti/x86-windows-static-md.cmake index e75d0c645c6a1..bf843c3e950e2 100644 --- a/cmake/vcpkg-triplets/binskim_nortti/x86-windows-static-md.cmake +++ b/cmake/vcpkg-triplets/binskim_nortti/x86-windows-static-md.cmake @@ -5,7 +5,7 @@ set(VCPKG_CRT_LINKAGE dynamic) set(VCPKG_LIBRARY_LINKAGE static) set(VCPKG_C_FLAGS "/MP /DWIN32 /D_WINDOWS /DWINAPI_FAMILY=100 /DWINVER=0x0A00 /D_WIN32_WINNT=0x0A00 /DNTDDI_VERSION=0x0A000000 /guard:cf /Qspectre") set(VCPKG_CXX_FLAGS "/MP /DWIN32 /D_WINDOWS /DWINAPI_FAMILY=100 /DWINVER=0x0A00 /D_WIN32_WINNT=0x0A00 /DNTDDI_VERSION=0x0A000000 /guard:cf /Qspectre /Zc:__cplusplus /GR- /we4541") -list(APPEND VCPKG_CMAKE_CONFIGURE_OPTIONS --compile-no-warning-as-error) +list(APPEND VCPKG_CMAKE_CONFIGURE_OPTIONS --compile-no-warning-as-error -DCMAKE_CXX_STANDARD=17) set(VCPKG_LINKER_FLAGS "/profile /DYNAMICBASE") if(PORT MATCHES "onnx") list(APPEND VCPKG_CMAKE_CONFIGURE_OPTIONS diff --git a/cmake/vcpkg-triplets/binskim_nortti/x86-windows-static.cmake b/cmake/vcpkg-triplets/binskim_nortti/x86-windows-static.cmake index 6de6f80d9705c..21e0858066ab8 100644 --- a/cmake/vcpkg-triplets/binskim_nortti/x86-windows-static.cmake +++ b/cmake/vcpkg-triplets/binskim_nortti/x86-windows-static.cmake @@ -5,7 +5,7 @@ set(VCPKG_CRT_LINKAGE static) set(VCPKG_LIBRARY_LINKAGE static) set(VCPKG_C_FLAGS "/MP /DWIN32 /D_WINDOWS /DWINAPI_FAMILY=100 /DWINVER=0x0A00 /D_WIN32_WINNT=0x0A00 /DNTDDI_VERSION=0x0A000000 /guard:cf /Qspectre") set(VCPKG_CXX_FLAGS "/MP /DWIN32 /D_WINDOWS /DWINAPI_FAMILY=100 /DWINVER=0x0A00 /D_WIN32_WINNT=0x0A00 /DNTDDI_VERSION=0x0A000000 /guard:cf /Qspectre /Zc:__cplusplus /GR- /we4541") -list(APPEND VCPKG_CMAKE_CONFIGURE_OPTIONS --compile-no-warning-as-error) +list(APPEND VCPKG_CMAKE_CONFIGURE_OPTIONS --compile-no-warning-as-error -DCMAKE_CXX_STANDARD=17) set(VCPKG_LINKER_FLAGS "/profile /DYNAMICBASE") if(PORT MATCHES "onnx") list(APPEND VCPKG_CMAKE_CONFIGURE_OPTIONS diff --git a/cmake/vcpkg-triplets/default/arm64-linux.cmake b/cmake/vcpkg-triplets/default/arm64-linux.cmake index 581367931ba5e..120865a5b0b57 100644 --- a/cmake/vcpkg-triplets/default/arm64-linux.cmake +++ b/cmake/vcpkg-triplets/default/arm64-linux.cmake @@ -3,11 +3,15 @@ set(VCPKG_TARGET_ARCHITECTURE arm64) set(VCPKG_CRT_LINKAGE dynamic) set(VCPKG_LIBRARY_LINKAGE static) -set(VCPKG_C_FLAGS "") -set(VCPKG_CXX_FLAGS "") +set(VCPKG_C_FLAGS "-g") +set(VCPKG_CXX_FLAGS "-g") +set(VCPKG_C_FLAGS_RELEASE "-DNDEBUG -O3") +set(VCPKG_CXX_FLAGS_RELEASE "-DNDEBUG -O3") set(VCPKG_CMAKE_SYSTEM_NAME Linux) set(CMAKE_POSITION_INDEPENDENT_CODE ON) list(APPEND VCPKG_CMAKE_CONFIGURE_OPTIONS --compile-no-warning-as-error -DBENCHMARK_ENABLE_WERROR=OFF) +set(VCPKG_LINKER_FLAGS "-g") +list(APPEND VCPKG_CMAKE_CONFIGURE_OPTIONS -DCMAKE_CXX_STANDARD=17) if(PORT MATCHES "onnx") list(APPEND VCPKG_CMAKE_CONFIGURE_OPTIONS "-DONNX_DISABLE_STATIC_REGISTRATION=ON" diff --git a/cmake/vcpkg-triplets/default/arm64-osx.cmake b/cmake/vcpkg-triplets/default/arm64-osx.cmake index 4d74306ba4e6a..02e8a3430475f 100644 --- a/cmake/vcpkg-triplets/default/arm64-osx.cmake +++ b/cmake/vcpkg-triplets/default/arm64-osx.cmake @@ -3,12 +3,16 @@ set(VCPKG_TARGET_ARCHITECTURE arm64) set(VCPKG_CRT_LINKAGE dynamic) set(VCPKG_LIBRARY_LINKAGE static) -set(VCPKG_C_FLAGS "") -set(VCPKG_CXX_FLAGS "") +set(VCPKG_C_FLAGS "-g") +set(VCPKG_CXX_FLAGS "-g") +set(VCPKG_C_FLAGS_RELEASE "-DNDEBUG -O3") +set(VCPKG_CXX_FLAGS_RELEASE "-DNDEBUG -O3") set(VCPKG_CMAKE_SYSTEM_NAME Darwin) set(VCPKG_OSX_ARCHITECTURES "arm64") set(CMAKE_POSITION_INDEPENDENT_CODE ON) list(APPEND VCPKG_CMAKE_CONFIGURE_OPTIONS --compile-no-warning-as-error -DBENCHMARK_ENABLE_WERROR=OFF) +set(VCPKG_LINKER_FLAGS "-g") +list(APPEND VCPKG_CMAKE_CONFIGURE_OPTIONS -DCMAKE_CXX_STANDARD=20) if(PORT MATCHES "onnx") list(APPEND VCPKG_CMAKE_CONFIGURE_OPTIONS "-DONNX_DISABLE_STATIC_REGISTRATION=ON" diff --git a/cmake/vcpkg-triplets/default/arm64-windows-static-md.cmake b/cmake/vcpkg-triplets/default/arm64-windows-static-md.cmake index 135dc6ed6f3b5..6d5cf67665f4f 100644 --- a/cmake/vcpkg-triplets/default/arm64-windows-static-md.cmake +++ b/cmake/vcpkg-triplets/default/arm64-windows-static-md.cmake @@ -5,7 +5,7 @@ set(VCPKG_CRT_LINKAGE dynamic) set(VCPKG_LIBRARY_LINKAGE static) set(VCPKG_C_FLAGS "/MP /DWIN32 /D_WINDOWS /DWINAPI_FAMILY=100 /DWINVER=0x0A00 /D_WIN32_WINNT=0x0A00 /DNTDDI_VERSION=0x0A000000") set(VCPKG_CXX_FLAGS "/MP /DWIN32 /D_WINDOWS /DWINAPI_FAMILY=100 /DWINVER=0x0A00 /D_WIN32_WINNT=0x0A00 /DNTDDI_VERSION=0x0A000000 /Zc:__cplusplus") -list(APPEND VCPKG_CMAKE_CONFIGURE_OPTIONS --compile-no-warning-as-error) +list(APPEND VCPKG_CMAKE_CONFIGURE_OPTIONS --compile-no-warning-as-error -DCMAKE_CXX_STANDARD=17) if(PORT MATCHES "onnx") list(APPEND VCPKG_CMAKE_CONFIGURE_OPTIONS "-DONNX_DISABLE_STATIC_REGISTRATION=ON" diff --git a/cmake/vcpkg-triplets/default/arm64-windows-static.cmake b/cmake/vcpkg-triplets/default/arm64-windows-static.cmake index 56e1aebfe620b..19ca6f16cd890 100644 --- a/cmake/vcpkg-triplets/default/arm64-windows-static.cmake +++ b/cmake/vcpkg-triplets/default/arm64-windows-static.cmake @@ -5,7 +5,7 @@ set(VCPKG_CRT_LINKAGE static) set(VCPKG_LIBRARY_LINKAGE static) set(VCPKG_C_FLAGS "/MP /DWIN32 /D_WINDOWS /DWINAPI_FAMILY=100 /DWINVER=0x0A00 /D_WIN32_WINNT=0x0A00 /DNTDDI_VERSION=0x0A000000") set(VCPKG_CXX_FLAGS "/MP /DWIN32 /D_WINDOWS /DWINAPI_FAMILY=100 /DWINVER=0x0A00 /D_WIN32_WINNT=0x0A00 /DNTDDI_VERSION=0x0A000000 /Zc:__cplusplus") -list(APPEND VCPKG_CMAKE_CONFIGURE_OPTIONS --compile-no-warning-as-error) +list(APPEND VCPKG_CMAKE_CONFIGURE_OPTIONS --compile-no-warning-as-error -DCMAKE_CXX_STANDARD=17) if(PORT MATCHES "onnx") list(APPEND VCPKG_CMAKE_CONFIGURE_OPTIONS "-DONNX_DISABLE_STATIC_REGISTRATION=ON" diff --git a/cmake/vcpkg-triplets/default/arm64ec-windows-static-md.cmake b/cmake/vcpkg-triplets/default/arm64ec-windows-static-md.cmake index 9256f07f5451d..d7982158f390c 100644 --- a/cmake/vcpkg-triplets/default/arm64ec-windows-static-md.cmake +++ b/cmake/vcpkg-triplets/default/arm64ec-windows-static-md.cmake @@ -5,7 +5,7 @@ set(VCPKG_CRT_LINKAGE dynamic) set(VCPKG_LIBRARY_LINKAGE static) set(VCPKG_C_FLAGS "/MP /DWIN32 /D_WINDOWS /DWINAPI_FAMILY=100 /DWINVER=0x0A00 /D_WIN32_WINNT=0x0A00 /DNTDDI_VERSION=0x0A000000") set(VCPKG_CXX_FLAGS "/MP /DWIN32 /D_WINDOWS /DWINAPI_FAMILY=100 /DWINVER=0x0A00 /D_WIN32_WINNT=0x0A00 /DNTDDI_VERSION=0x0A000000 /Zc:__cplusplus") -list(APPEND VCPKG_CMAKE_CONFIGURE_OPTIONS --compile-no-warning-as-error) +list(APPEND VCPKG_CMAKE_CONFIGURE_OPTIONS --compile-no-warning-as-error -DCMAKE_CXX_STANDARD=17) if(PORT MATCHES "onnx") list(APPEND VCPKG_CMAKE_CONFIGURE_OPTIONS "-DONNX_DISABLE_STATIC_REGISTRATION=ON" diff --git a/cmake/vcpkg-triplets/default/arm64ec-windows-static.cmake b/cmake/vcpkg-triplets/default/arm64ec-windows-static.cmake index bbdfed06fb2bc..fb14ad71c1d91 100644 --- a/cmake/vcpkg-triplets/default/arm64ec-windows-static.cmake +++ b/cmake/vcpkg-triplets/default/arm64ec-windows-static.cmake @@ -5,7 +5,7 @@ set(VCPKG_CRT_LINKAGE static) set(VCPKG_LIBRARY_LINKAGE static) set(VCPKG_C_FLAGS "/MP /DWIN32 /D_WINDOWS /DWINAPI_FAMILY=100 /DWINVER=0x0A00 /D_WIN32_WINNT=0x0A00 /DNTDDI_VERSION=0x0A000000") set(VCPKG_CXX_FLAGS "/MP /DWIN32 /D_WINDOWS /DWINAPI_FAMILY=100 /DWINVER=0x0A00 /D_WIN32_WINNT=0x0A00 /DNTDDI_VERSION=0x0A000000 /Zc:__cplusplus") -list(APPEND VCPKG_CMAKE_CONFIGURE_OPTIONS --compile-no-warning-as-error) +list(APPEND VCPKG_CMAKE_CONFIGURE_OPTIONS --compile-no-warning-as-error -DCMAKE_CXX_STANDARD=17) if(PORT MATCHES "onnx") list(APPEND VCPKG_CMAKE_CONFIGURE_OPTIONS "-DONNX_DISABLE_STATIC_REGISTRATION=ON" diff --git a/cmake/vcpkg-triplets/default/universal2-osx.cmake b/cmake/vcpkg-triplets/default/universal2-osx.cmake index 64b19451dd64d..57386c423cf99 100644 --- a/cmake/vcpkg-triplets/default/universal2-osx.cmake +++ b/cmake/vcpkg-triplets/default/universal2-osx.cmake @@ -3,12 +3,16 @@ set(VCPKG_TARGET_ARCHITECTURE x64) set(VCPKG_CRT_LINKAGE dynamic) set(VCPKG_LIBRARY_LINKAGE static) -set(VCPKG_C_FLAGS "") -set(VCPKG_CXX_FLAGS "") +set(VCPKG_C_FLAGS "-g") +set(VCPKG_CXX_FLAGS "-g") +set(VCPKG_C_FLAGS_RELEASE "-DNDEBUG -O3") +set(VCPKG_CXX_FLAGS_RELEASE "-DNDEBUG -O3") set(VCPKG_CMAKE_SYSTEM_NAME Darwin) set(VCPKG_OSX_ARCHITECTURES "x86_64;arm64") set(CMAKE_POSITION_INDEPENDENT_CODE ON) list(APPEND VCPKG_CMAKE_CONFIGURE_OPTIONS --compile-no-warning-as-error -DBENCHMARK_ENABLE_WERROR=OFF) +set(VCPKG_LINKER_FLAGS "-g") +list(APPEND VCPKG_CMAKE_CONFIGURE_OPTIONS -DCMAKE_CXX_STANDARD=20) if(PORT MATCHES "onnx") list(APPEND VCPKG_CMAKE_CONFIGURE_OPTIONS "-DONNX_DISABLE_STATIC_REGISTRATION=ON" diff --git a/cmake/vcpkg-triplets/default/x64-linux.cmake b/cmake/vcpkg-triplets/default/x64-linux.cmake index 57114dd5fcb76..30c7b1b786302 100644 --- a/cmake/vcpkg-triplets/default/x64-linux.cmake +++ b/cmake/vcpkg-triplets/default/x64-linux.cmake @@ -3,11 +3,15 @@ set(VCPKG_TARGET_ARCHITECTURE x64) set(VCPKG_CRT_LINKAGE dynamic) set(VCPKG_LIBRARY_LINKAGE static) -set(VCPKG_C_FLAGS "") -set(VCPKG_CXX_FLAGS "") +set(VCPKG_C_FLAGS "-g") +set(VCPKG_CXX_FLAGS "-g") +set(VCPKG_C_FLAGS_RELEASE "-DNDEBUG -O3") +set(VCPKG_CXX_FLAGS_RELEASE "-DNDEBUG -O3") set(VCPKG_CMAKE_SYSTEM_NAME Linux) set(CMAKE_POSITION_INDEPENDENT_CODE ON) list(APPEND VCPKG_CMAKE_CONFIGURE_OPTIONS --compile-no-warning-as-error -DBENCHMARK_ENABLE_WERROR=OFF) +set(VCPKG_LINKER_FLAGS "-g") +list(APPEND VCPKG_CMAKE_CONFIGURE_OPTIONS -DCMAKE_CXX_STANDARD=17) if(PORT MATCHES "onnx") list(APPEND VCPKG_CMAKE_CONFIGURE_OPTIONS "-DONNX_DISABLE_STATIC_REGISTRATION=ON" diff --git a/cmake/vcpkg-triplets/default/x64-osx.cmake b/cmake/vcpkg-triplets/default/x64-osx.cmake index dd50e622677b7..7af622e1354b9 100644 --- a/cmake/vcpkg-triplets/default/x64-osx.cmake +++ b/cmake/vcpkg-triplets/default/x64-osx.cmake @@ -3,12 +3,16 @@ set(VCPKG_TARGET_ARCHITECTURE x64) set(VCPKG_CRT_LINKAGE dynamic) set(VCPKG_LIBRARY_LINKAGE static) -set(VCPKG_C_FLAGS "") -set(VCPKG_CXX_FLAGS "") +set(VCPKG_C_FLAGS "-g") +set(VCPKG_CXX_FLAGS "-g") +set(VCPKG_C_FLAGS_RELEASE "-DNDEBUG -O3") +set(VCPKG_CXX_FLAGS_RELEASE "-DNDEBUG -O3") set(VCPKG_CMAKE_SYSTEM_NAME Darwin) set(VCPKG_OSX_ARCHITECTURES "x86_64") set(CMAKE_POSITION_INDEPENDENT_CODE ON) list(APPEND VCPKG_CMAKE_CONFIGURE_OPTIONS --compile-no-warning-as-error -DBENCHMARK_ENABLE_WERROR=OFF) +set(VCPKG_LINKER_FLAGS "-g") +list(APPEND VCPKG_CMAKE_CONFIGURE_OPTIONS -DCMAKE_CXX_STANDARD=20) if(PORT MATCHES "onnx") list(APPEND VCPKG_CMAKE_CONFIGURE_OPTIONS "-DONNX_DISABLE_STATIC_REGISTRATION=ON" diff --git a/cmake/vcpkg-triplets/default/x64-windows-static-md.cmake b/cmake/vcpkg-triplets/default/x64-windows-static-md.cmake index 5339a033715bb..bec5f2724da13 100644 --- a/cmake/vcpkg-triplets/default/x64-windows-static-md.cmake +++ b/cmake/vcpkg-triplets/default/x64-windows-static-md.cmake @@ -5,7 +5,7 @@ set(VCPKG_CRT_LINKAGE dynamic) set(VCPKG_LIBRARY_LINKAGE static) set(VCPKG_C_FLAGS "/MP /DWIN32 /D_WINDOWS /DWINAPI_FAMILY=100 /DWINVER=0x0A00 /D_WIN32_WINNT=0x0A00 /DNTDDI_VERSION=0x0A000000") set(VCPKG_CXX_FLAGS "/MP /DWIN32 /D_WINDOWS /DWINAPI_FAMILY=100 /DWINVER=0x0A00 /D_WIN32_WINNT=0x0A00 /DNTDDI_VERSION=0x0A000000 /Zc:__cplusplus") -list(APPEND VCPKG_CMAKE_CONFIGURE_OPTIONS --compile-no-warning-as-error) +list(APPEND VCPKG_CMAKE_CONFIGURE_OPTIONS --compile-no-warning-as-error -DCMAKE_CXX_STANDARD=17) if(PORT MATCHES "onnx") list(APPEND VCPKG_CMAKE_CONFIGURE_OPTIONS "-DONNX_DISABLE_STATIC_REGISTRATION=ON" diff --git a/cmake/vcpkg-triplets/default/x64-windows-static.cmake b/cmake/vcpkg-triplets/default/x64-windows-static.cmake index 579740efb6ab7..3f62418071583 100644 --- a/cmake/vcpkg-triplets/default/x64-windows-static.cmake +++ b/cmake/vcpkg-triplets/default/x64-windows-static.cmake @@ -5,7 +5,7 @@ set(VCPKG_CRT_LINKAGE static) set(VCPKG_LIBRARY_LINKAGE static) set(VCPKG_C_FLAGS "/MP /DWIN32 /D_WINDOWS /DWINAPI_FAMILY=100 /DWINVER=0x0A00 /D_WIN32_WINNT=0x0A00 /DNTDDI_VERSION=0x0A000000") set(VCPKG_CXX_FLAGS "/MP /DWIN32 /D_WINDOWS /DWINAPI_FAMILY=100 /DWINVER=0x0A00 /D_WIN32_WINNT=0x0A00 /DNTDDI_VERSION=0x0A000000 /Zc:__cplusplus") -list(APPEND VCPKG_CMAKE_CONFIGURE_OPTIONS --compile-no-warning-as-error) +list(APPEND VCPKG_CMAKE_CONFIGURE_OPTIONS --compile-no-warning-as-error -DCMAKE_CXX_STANDARD=17) if(PORT MATCHES "onnx") list(APPEND VCPKG_CMAKE_CONFIGURE_OPTIONS "-DONNX_DISABLE_STATIC_REGISTRATION=ON" diff --git a/cmake/vcpkg-triplets/default/x86-windows-static-md.cmake b/cmake/vcpkg-triplets/default/x86-windows-static-md.cmake index 34223c67e8f44..d93d87b3289f3 100644 --- a/cmake/vcpkg-triplets/default/x86-windows-static-md.cmake +++ b/cmake/vcpkg-triplets/default/x86-windows-static-md.cmake @@ -5,7 +5,7 @@ set(VCPKG_CRT_LINKAGE dynamic) set(VCPKG_LIBRARY_LINKAGE static) set(VCPKG_C_FLAGS "/MP /DWIN32 /D_WINDOWS /DWINAPI_FAMILY=100 /DWINVER=0x0A00 /D_WIN32_WINNT=0x0A00 /DNTDDI_VERSION=0x0A000000") set(VCPKG_CXX_FLAGS "/MP /DWIN32 /D_WINDOWS /DWINAPI_FAMILY=100 /DWINVER=0x0A00 /D_WIN32_WINNT=0x0A00 /DNTDDI_VERSION=0x0A000000 /Zc:__cplusplus") -list(APPEND VCPKG_CMAKE_CONFIGURE_OPTIONS --compile-no-warning-as-error) +list(APPEND VCPKG_CMAKE_CONFIGURE_OPTIONS --compile-no-warning-as-error -DCMAKE_CXX_STANDARD=17) if(PORT MATCHES "onnx") list(APPEND VCPKG_CMAKE_CONFIGURE_OPTIONS "-DONNX_DISABLE_STATIC_REGISTRATION=ON" diff --git a/cmake/vcpkg-triplets/default/x86-windows-static.cmake b/cmake/vcpkg-triplets/default/x86-windows-static.cmake index fc95d409f890e..727b35cd1f7cc 100644 --- a/cmake/vcpkg-triplets/default/x86-windows-static.cmake +++ b/cmake/vcpkg-triplets/default/x86-windows-static.cmake @@ -5,7 +5,7 @@ set(VCPKG_CRT_LINKAGE static) set(VCPKG_LIBRARY_LINKAGE static) set(VCPKG_C_FLAGS "/MP /DWIN32 /D_WINDOWS /DWINAPI_FAMILY=100 /DWINVER=0x0A00 /D_WIN32_WINNT=0x0A00 /DNTDDI_VERSION=0x0A000000") set(VCPKG_CXX_FLAGS "/MP /DWIN32 /D_WINDOWS /DWINAPI_FAMILY=100 /DWINVER=0x0A00 /D_WIN32_WINNT=0x0A00 /DNTDDI_VERSION=0x0A000000 /Zc:__cplusplus") -list(APPEND VCPKG_CMAKE_CONFIGURE_OPTIONS --compile-no-warning-as-error) +list(APPEND VCPKG_CMAKE_CONFIGURE_OPTIONS --compile-no-warning-as-error -DCMAKE_CXX_STANDARD=17) if(PORT MATCHES "onnx") list(APPEND VCPKG_CMAKE_CONFIGURE_OPTIONS "-DONNX_DISABLE_STATIC_REGISTRATION=ON" diff --git a/cmake/vcpkg-triplets/gen.py b/cmake/vcpkg-triplets/gen.py index 615ca66fc8f64..bec1a87a0a20e 100644 --- a/cmake/vcpkg-triplets/gen.py +++ b/cmake/vcpkg-triplets/gen.py @@ -88,9 +88,11 @@ def add_copyright_header(f): # Disable RTTI and turn usage of dynamic_cast and typeid into errors cxxflags += ["/GR-", "/we4541"] # TODO: should it be a cmake list separated by semicolons? - f.write('set(VCPKG_C_FLAGS "{}")\n'.format(" ".join(cflags))) - f.write('set(VCPKG_CXX_FLAGS "{}")\n'.format(" ".join(cxxflags))) - f.write("list(APPEND VCPKG_CMAKE_CONFIGURE_OPTIONS --compile-no-warning-as-error)\n") + if len(cflags) >= 1: + f.write('set(VCPKG_C_FLAGS "{}")\n'.format(" ".join(cflags))) + if len(cxxflags) >= 1: + f.write('set(VCPKG_CXX_FLAGS "{}")\n'.format(" ".join(cxxflags))) + f.write("list(APPEND VCPKG_CMAKE_CONFIGURE_OPTIONS --compile-no-warning-as-error -DCMAKE_CXX_STANDARD=17)\n") if ldflags: f.write('set(VCPKG_LINKER_FLAGS "{}")\n'.format(" ".join(ldflags))) add_port_configs(f) @@ -135,27 +137,34 @@ def add_copyright_header(f): f.write(f"set(VCPKG_TARGET_ARCHITECTURE {target_abi})\n") f.write(f"set(VCPKG_CRT_LINKAGE {crt_linkage})\n") f.write("set(VCPKG_LIBRARY_LINKAGE static)\n") + + if enable_binskim and os_name == "linux": ldflags = [ "-Wl,-Bsymbolic-functions", "-Wl,-z,relro", "-Wl,-z,now", - "-Wl,-z,noexecstack", + "-Wl,-z,noexecstack" ] else: ldflags = [] - cflags = [] + # Enable debug info for all build configs + cflags = ["-g"] + cflags_release = ["-DNDEBUG", "-O3"] if enable_binskim: - cflags += [ + # A warning may be generated from include/features.h if the _FORTIFY_SOURCE flag was used in a debug build + cflags_release += [ "-Wp,-D_FORTIFY_SOURCE=2", "-Wp,-D_GLIBCXX_ASSERTIONS", "-fstack-protector-strong", ] if target_abi == "x64": - cflags += ["-fstack-clash-protection", "-fcf-protection"] + cflags_release += ["-fstack-clash-protection", "-fcf-protection"] elif enable_asan: cflags += ["-fsanitize=address"] ldflags += ["-fsanitize=address"] + # Enable debug info for all build configs + ldflags.append('-g') # Avoid unboundTypeError for WebNN EP since unbound type names are illegal with RTTI disabled # in Embind API, relevant issue: https://github.com/emscripten-core/emscripten/issues/7001 if not enable_rtti: @@ -163,8 +172,13 @@ def add_copyright_header(f): cxxflags = cflags.copy() if not enable_rtti: cxxflags.append("-fno-rtti") - f.write('set(VCPKG_C_FLAGS "{}")\n'.format(" ".join(cflags))) - f.write('set(VCPKG_CXX_FLAGS "{}")\n'.format(" ".join(cxxflags))) + if len(cflags) >= 1: + f.write('set(VCPKG_C_FLAGS "{}")\n'.format(" ".join(cflags))) + if len(cxxflags) >= 1: + f.write('set(VCPKG_CXX_FLAGS "{}")\n'.format(" ".join(cxxflags))) + if len(cflags_release) >= 1: + f.write('set(VCPKG_C_FLAGS_RELEASE "{}")\n'.format(" ".join(cflags_release))) + f.write('set(VCPKG_CXX_FLAGS_RELEASE "{}")\n'.format(" ".join(cflags_release))) if os_name == "linux": f.write("set(VCPKG_CMAKE_SYSTEM_NAME Linux)\n") else: @@ -184,4 +198,8 @@ def add_copyright_header(f): if ldflags: f.write('set(VCPKG_LINKER_FLAGS "{}")\n'.format(" ".join(ldflags))) + if os_name == 'osx': + f.write('list(APPEND VCPKG_CMAKE_CONFIGURE_OPTIONS -DCMAKE_CXX_STANDARD=20)\n') + else: + f.write('list(APPEND VCPKG_CMAKE_CONFIGURE_OPTIONS -DCMAKE_CXX_STANDARD=17)\n') add_port_configs(f) diff --git a/cmake/vcpkg-triplets/nortti/arm64-linux.cmake b/cmake/vcpkg-triplets/nortti/arm64-linux.cmake index 4bd974a112125..f9035fc299ce5 100644 --- a/cmake/vcpkg-triplets/nortti/arm64-linux.cmake +++ b/cmake/vcpkg-triplets/nortti/arm64-linux.cmake @@ -3,11 +3,15 @@ set(VCPKG_TARGET_ARCHITECTURE arm64) set(VCPKG_CRT_LINKAGE dynamic) set(VCPKG_LIBRARY_LINKAGE static) -set(VCPKG_C_FLAGS "-DEMSCRIPTEN_HAS_UNBOUND_TYPE_NAMES=0") -set(VCPKG_CXX_FLAGS "-DEMSCRIPTEN_HAS_UNBOUND_TYPE_NAMES=0 -fno-rtti") +set(VCPKG_C_FLAGS "-g -DEMSCRIPTEN_HAS_UNBOUND_TYPE_NAMES=0") +set(VCPKG_CXX_FLAGS "-g -DEMSCRIPTEN_HAS_UNBOUND_TYPE_NAMES=0 -fno-rtti") +set(VCPKG_C_FLAGS_RELEASE "-DNDEBUG -O3") +set(VCPKG_CXX_FLAGS_RELEASE "-DNDEBUG -O3") set(VCPKG_CMAKE_SYSTEM_NAME Linux) set(CMAKE_POSITION_INDEPENDENT_CODE ON) list(APPEND VCPKG_CMAKE_CONFIGURE_OPTIONS --compile-no-warning-as-error -DBENCHMARK_ENABLE_WERROR=OFF) +set(VCPKG_LINKER_FLAGS "-g") +list(APPEND VCPKG_CMAKE_CONFIGURE_OPTIONS -DCMAKE_CXX_STANDARD=17) if(PORT MATCHES "onnx") list(APPEND VCPKG_CMAKE_CONFIGURE_OPTIONS "-DONNX_DISABLE_STATIC_REGISTRATION=ON" diff --git a/cmake/vcpkg-triplets/nortti/arm64-osx.cmake b/cmake/vcpkg-triplets/nortti/arm64-osx.cmake index fd8be60b7158a..d8971e8122f9d 100644 --- a/cmake/vcpkg-triplets/nortti/arm64-osx.cmake +++ b/cmake/vcpkg-triplets/nortti/arm64-osx.cmake @@ -3,12 +3,16 @@ set(VCPKG_TARGET_ARCHITECTURE arm64) set(VCPKG_CRT_LINKAGE dynamic) set(VCPKG_LIBRARY_LINKAGE static) -set(VCPKG_C_FLAGS "-DEMSCRIPTEN_HAS_UNBOUND_TYPE_NAMES=0") -set(VCPKG_CXX_FLAGS "-DEMSCRIPTEN_HAS_UNBOUND_TYPE_NAMES=0 -fno-rtti") +set(VCPKG_C_FLAGS "-g -DEMSCRIPTEN_HAS_UNBOUND_TYPE_NAMES=0") +set(VCPKG_CXX_FLAGS "-g -DEMSCRIPTEN_HAS_UNBOUND_TYPE_NAMES=0 -fno-rtti") +set(VCPKG_C_FLAGS_RELEASE "-DNDEBUG -O3") +set(VCPKG_CXX_FLAGS_RELEASE "-DNDEBUG -O3") set(VCPKG_CMAKE_SYSTEM_NAME Darwin) set(VCPKG_OSX_ARCHITECTURES "arm64") set(CMAKE_POSITION_INDEPENDENT_CODE ON) list(APPEND VCPKG_CMAKE_CONFIGURE_OPTIONS --compile-no-warning-as-error -DBENCHMARK_ENABLE_WERROR=OFF) +set(VCPKG_LINKER_FLAGS "-g") +list(APPEND VCPKG_CMAKE_CONFIGURE_OPTIONS -DCMAKE_CXX_STANDARD=20) if(PORT MATCHES "onnx") list(APPEND VCPKG_CMAKE_CONFIGURE_OPTIONS "-DONNX_DISABLE_STATIC_REGISTRATION=ON" diff --git a/cmake/vcpkg-triplets/nortti/arm64-windows-static-md.cmake b/cmake/vcpkg-triplets/nortti/arm64-windows-static-md.cmake index 45b24baa2c94b..9d3c86ce644d0 100644 --- a/cmake/vcpkg-triplets/nortti/arm64-windows-static-md.cmake +++ b/cmake/vcpkg-triplets/nortti/arm64-windows-static-md.cmake @@ -5,7 +5,7 @@ set(VCPKG_CRT_LINKAGE dynamic) set(VCPKG_LIBRARY_LINKAGE static) set(VCPKG_C_FLAGS "/MP /DWIN32 /D_WINDOWS /DWINAPI_FAMILY=100 /DWINVER=0x0A00 /D_WIN32_WINNT=0x0A00 /DNTDDI_VERSION=0x0A000000") set(VCPKG_CXX_FLAGS "/MP /DWIN32 /D_WINDOWS /DWINAPI_FAMILY=100 /DWINVER=0x0A00 /D_WIN32_WINNT=0x0A00 /DNTDDI_VERSION=0x0A000000 /Zc:__cplusplus /GR- /we4541") -list(APPEND VCPKG_CMAKE_CONFIGURE_OPTIONS --compile-no-warning-as-error) +list(APPEND VCPKG_CMAKE_CONFIGURE_OPTIONS --compile-no-warning-as-error -DCMAKE_CXX_STANDARD=17) if(PORT MATCHES "onnx") list(APPEND VCPKG_CMAKE_CONFIGURE_OPTIONS "-DONNX_DISABLE_STATIC_REGISTRATION=ON" diff --git a/cmake/vcpkg-triplets/nortti/arm64-windows-static.cmake b/cmake/vcpkg-triplets/nortti/arm64-windows-static.cmake index 947fe9b61bd6c..238f7405ec492 100644 --- a/cmake/vcpkg-triplets/nortti/arm64-windows-static.cmake +++ b/cmake/vcpkg-triplets/nortti/arm64-windows-static.cmake @@ -5,7 +5,7 @@ set(VCPKG_CRT_LINKAGE static) set(VCPKG_LIBRARY_LINKAGE static) set(VCPKG_C_FLAGS "/MP /DWIN32 /D_WINDOWS /DWINAPI_FAMILY=100 /DWINVER=0x0A00 /D_WIN32_WINNT=0x0A00 /DNTDDI_VERSION=0x0A000000") set(VCPKG_CXX_FLAGS "/MP /DWIN32 /D_WINDOWS /DWINAPI_FAMILY=100 /DWINVER=0x0A00 /D_WIN32_WINNT=0x0A00 /DNTDDI_VERSION=0x0A000000 /Zc:__cplusplus /GR- /we4541") -list(APPEND VCPKG_CMAKE_CONFIGURE_OPTIONS --compile-no-warning-as-error) +list(APPEND VCPKG_CMAKE_CONFIGURE_OPTIONS --compile-no-warning-as-error -DCMAKE_CXX_STANDARD=17) if(PORT MATCHES "onnx") list(APPEND VCPKG_CMAKE_CONFIGURE_OPTIONS "-DONNX_DISABLE_STATIC_REGISTRATION=ON" diff --git a/cmake/vcpkg-triplets/nortti/arm64ec-windows-static-md.cmake b/cmake/vcpkg-triplets/nortti/arm64ec-windows-static-md.cmake index ea5741fa42fbe..da314824ca7cc 100644 --- a/cmake/vcpkg-triplets/nortti/arm64ec-windows-static-md.cmake +++ b/cmake/vcpkg-triplets/nortti/arm64ec-windows-static-md.cmake @@ -5,7 +5,7 @@ set(VCPKG_CRT_LINKAGE dynamic) set(VCPKG_LIBRARY_LINKAGE static) set(VCPKG_C_FLAGS "/MP /DWIN32 /D_WINDOWS /DWINAPI_FAMILY=100 /DWINVER=0x0A00 /D_WIN32_WINNT=0x0A00 /DNTDDI_VERSION=0x0A000000") set(VCPKG_CXX_FLAGS "/MP /DWIN32 /D_WINDOWS /DWINAPI_FAMILY=100 /DWINVER=0x0A00 /D_WIN32_WINNT=0x0A00 /DNTDDI_VERSION=0x0A000000 /Zc:__cplusplus /GR- /we4541") -list(APPEND VCPKG_CMAKE_CONFIGURE_OPTIONS --compile-no-warning-as-error) +list(APPEND VCPKG_CMAKE_CONFIGURE_OPTIONS --compile-no-warning-as-error -DCMAKE_CXX_STANDARD=17) if(PORT MATCHES "onnx") list(APPEND VCPKG_CMAKE_CONFIGURE_OPTIONS "-DONNX_DISABLE_STATIC_REGISTRATION=ON" diff --git a/cmake/vcpkg-triplets/nortti/arm64ec-windows-static.cmake b/cmake/vcpkg-triplets/nortti/arm64ec-windows-static.cmake index 2b354ba511303..0c7fb60401f1d 100644 --- a/cmake/vcpkg-triplets/nortti/arm64ec-windows-static.cmake +++ b/cmake/vcpkg-triplets/nortti/arm64ec-windows-static.cmake @@ -5,7 +5,7 @@ set(VCPKG_CRT_LINKAGE static) set(VCPKG_LIBRARY_LINKAGE static) set(VCPKG_C_FLAGS "/MP /DWIN32 /D_WINDOWS /DWINAPI_FAMILY=100 /DWINVER=0x0A00 /D_WIN32_WINNT=0x0A00 /DNTDDI_VERSION=0x0A000000") set(VCPKG_CXX_FLAGS "/MP /DWIN32 /D_WINDOWS /DWINAPI_FAMILY=100 /DWINVER=0x0A00 /D_WIN32_WINNT=0x0A00 /DNTDDI_VERSION=0x0A000000 /Zc:__cplusplus /GR- /we4541") -list(APPEND VCPKG_CMAKE_CONFIGURE_OPTIONS --compile-no-warning-as-error) +list(APPEND VCPKG_CMAKE_CONFIGURE_OPTIONS --compile-no-warning-as-error -DCMAKE_CXX_STANDARD=17) if(PORT MATCHES "onnx") list(APPEND VCPKG_CMAKE_CONFIGURE_OPTIONS "-DONNX_DISABLE_STATIC_REGISTRATION=ON" diff --git a/cmake/vcpkg-triplets/nortti/universal2-osx.cmake b/cmake/vcpkg-triplets/nortti/universal2-osx.cmake index 7111bb87c910b..febc002c0488e 100644 --- a/cmake/vcpkg-triplets/nortti/universal2-osx.cmake +++ b/cmake/vcpkg-triplets/nortti/universal2-osx.cmake @@ -3,12 +3,16 @@ set(VCPKG_TARGET_ARCHITECTURE x64) set(VCPKG_CRT_LINKAGE dynamic) set(VCPKG_LIBRARY_LINKAGE static) -set(VCPKG_C_FLAGS "-DEMSCRIPTEN_HAS_UNBOUND_TYPE_NAMES=0") -set(VCPKG_CXX_FLAGS "-DEMSCRIPTEN_HAS_UNBOUND_TYPE_NAMES=0 -fno-rtti") +set(VCPKG_C_FLAGS "-g -DEMSCRIPTEN_HAS_UNBOUND_TYPE_NAMES=0") +set(VCPKG_CXX_FLAGS "-g -DEMSCRIPTEN_HAS_UNBOUND_TYPE_NAMES=0 -fno-rtti") +set(VCPKG_C_FLAGS_RELEASE "-DNDEBUG -O3") +set(VCPKG_CXX_FLAGS_RELEASE "-DNDEBUG -O3") set(VCPKG_CMAKE_SYSTEM_NAME Darwin) set(VCPKG_OSX_ARCHITECTURES "x86_64;arm64") set(CMAKE_POSITION_INDEPENDENT_CODE ON) list(APPEND VCPKG_CMAKE_CONFIGURE_OPTIONS --compile-no-warning-as-error -DBENCHMARK_ENABLE_WERROR=OFF) +set(VCPKG_LINKER_FLAGS "-g") +list(APPEND VCPKG_CMAKE_CONFIGURE_OPTIONS -DCMAKE_CXX_STANDARD=20) if(PORT MATCHES "onnx") list(APPEND VCPKG_CMAKE_CONFIGURE_OPTIONS "-DONNX_DISABLE_STATIC_REGISTRATION=ON" diff --git a/cmake/vcpkg-triplets/nortti/x64-linux.cmake b/cmake/vcpkg-triplets/nortti/x64-linux.cmake index 34fcc968e6c0e..c1dac19d33f2c 100644 --- a/cmake/vcpkg-triplets/nortti/x64-linux.cmake +++ b/cmake/vcpkg-triplets/nortti/x64-linux.cmake @@ -3,11 +3,15 @@ set(VCPKG_TARGET_ARCHITECTURE x64) set(VCPKG_CRT_LINKAGE dynamic) set(VCPKG_LIBRARY_LINKAGE static) -set(VCPKG_C_FLAGS "-DEMSCRIPTEN_HAS_UNBOUND_TYPE_NAMES=0") -set(VCPKG_CXX_FLAGS "-DEMSCRIPTEN_HAS_UNBOUND_TYPE_NAMES=0 -fno-rtti") +set(VCPKG_C_FLAGS "-g -DEMSCRIPTEN_HAS_UNBOUND_TYPE_NAMES=0") +set(VCPKG_CXX_FLAGS "-g -DEMSCRIPTEN_HAS_UNBOUND_TYPE_NAMES=0 -fno-rtti") +set(VCPKG_C_FLAGS_RELEASE "-DNDEBUG -O3") +set(VCPKG_CXX_FLAGS_RELEASE "-DNDEBUG -O3") set(VCPKG_CMAKE_SYSTEM_NAME Linux) set(CMAKE_POSITION_INDEPENDENT_CODE ON) list(APPEND VCPKG_CMAKE_CONFIGURE_OPTIONS --compile-no-warning-as-error -DBENCHMARK_ENABLE_WERROR=OFF) +set(VCPKG_LINKER_FLAGS "-g") +list(APPEND VCPKG_CMAKE_CONFIGURE_OPTIONS -DCMAKE_CXX_STANDARD=17) if(PORT MATCHES "onnx") list(APPEND VCPKG_CMAKE_CONFIGURE_OPTIONS "-DONNX_DISABLE_STATIC_REGISTRATION=ON" diff --git a/cmake/vcpkg-triplets/nortti/x64-osx.cmake b/cmake/vcpkg-triplets/nortti/x64-osx.cmake index 0a7fcc08e0c69..242d34a358170 100644 --- a/cmake/vcpkg-triplets/nortti/x64-osx.cmake +++ b/cmake/vcpkg-triplets/nortti/x64-osx.cmake @@ -3,12 +3,16 @@ set(VCPKG_TARGET_ARCHITECTURE x64) set(VCPKG_CRT_LINKAGE dynamic) set(VCPKG_LIBRARY_LINKAGE static) -set(VCPKG_C_FLAGS "-DEMSCRIPTEN_HAS_UNBOUND_TYPE_NAMES=0") -set(VCPKG_CXX_FLAGS "-DEMSCRIPTEN_HAS_UNBOUND_TYPE_NAMES=0 -fno-rtti") +set(VCPKG_C_FLAGS "-g -DEMSCRIPTEN_HAS_UNBOUND_TYPE_NAMES=0") +set(VCPKG_CXX_FLAGS "-g -DEMSCRIPTEN_HAS_UNBOUND_TYPE_NAMES=0 -fno-rtti") +set(VCPKG_C_FLAGS_RELEASE "-DNDEBUG -O3") +set(VCPKG_CXX_FLAGS_RELEASE "-DNDEBUG -O3") set(VCPKG_CMAKE_SYSTEM_NAME Darwin) set(VCPKG_OSX_ARCHITECTURES "x86_64") set(CMAKE_POSITION_INDEPENDENT_CODE ON) list(APPEND VCPKG_CMAKE_CONFIGURE_OPTIONS --compile-no-warning-as-error -DBENCHMARK_ENABLE_WERROR=OFF) +set(VCPKG_LINKER_FLAGS "-g") +list(APPEND VCPKG_CMAKE_CONFIGURE_OPTIONS -DCMAKE_CXX_STANDARD=20) if(PORT MATCHES "onnx") list(APPEND VCPKG_CMAKE_CONFIGURE_OPTIONS "-DONNX_DISABLE_STATIC_REGISTRATION=ON" diff --git a/cmake/vcpkg-triplets/nortti/x64-windows-static-md.cmake b/cmake/vcpkg-triplets/nortti/x64-windows-static-md.cmake index 5c62c4263f8eb..a8d2441583d0f 100644 --- a/cmake/vcpkg-triplets/nortti/x64-windows-static-md.cmake +++ b/cmake/vcpkg-triplets/nortti/x64-windows-static-md.cmake @@ -5,7 +5,7 @@ set(VCPKG_CRT_LINKAGE dynamic) set(VCPKG_LIBRARY_LINKAGE static) set(VCPKG_C_FLAGS "/MP /DWIN32 /D_WINDOWS /DWINAPI_FAMILY=100 /DWINVER=0x0A00 /D_WIN32_WINNT=0x0A00 /DNTDDI_VERSION=0x0A000000") set(VCPKG_CXX_FLAGS "/MP /DWIN32 /D_WINDOWS /DWINAPI_FAMILY=100 /DWINVER=0x0A00 /D_WIN32_WINNT=0x0A00 /DNTDDI_VERSION=0x0A000000 /Zc:__cplusplus /GR- /we4541") -list(APPEND VCPKG_CMAKE_CONFIGURE_OPTIONS --compile-no-warning-as-error) +list(APPEND VCPKG_CMAKE_CONFIGURE_OPTIONS --compile-no-warning-as-error -DCMAKE_CXX_STANDARD=17) if(PORT MATCHES "onnx") list(APPEND VCPKG_CMAKE_CONFIGURE_OPTIONS "-DONNX_DISABLE_STATIC_REGISTRATION=ON" diff --git a/cmake/vcpkg-triplets/nortti/x64-windows-static.cmake b/cmake/vcpkg-triplets/nortti/x64-windows-static.cmake index deceefcd95910..688ed230fd17c 100644 --- a/cmake/vcpkg-triplets/nortti/x64-windows-static.cmake +++ b/cmake/vcpkg-triplets/nortti/x64-windows-static.cmake @@ -5,7 +5,7 @@ set(VCPKG_CRT_LINKAGE static) set(VCPKG_LIBRARY_LINKAGE static) set(VCPKG_C_FLAGS "/MP /DWIN32 /D_WINDOWS /DWINAPI_FAMILY=100 /DWINVER=0x0A00 /D_WIN32_WINNT=0x0A00 /DNTDDI_VERSION=0x0A000000") set(VCPKG_CXX_FLAGS "/MP /DWIN32 /D_WINDOWS /DWINAPI_FAMILY=100 /DWINVER=0x0A00 /D_WIN32_WINNT=0x0A00 /DNTDDI_VERSION=0x0A000000 /Zc:__cplusplus /GR- /we4541") -list(APPEND VCPKG_CMAKE_CONFIGURE_OPTIONS --compile-no-warning-as-error) +list(APPEND VCPKG_CMAKE_CONFIGURE_OPTIONS --compile-no-warning-as-error -DCMAKE_CXX_STANDARD=17) if(PORT MATCHES "onnx") list(APPEND VCPKG_CMAKE_CONFIGURE_OPTIONS "-DONNX_DISABLE_STATIC_REGISTRATION=ON" diff --git a/cmake/vcpkg-triplets/nortti/x86-windows-static-md.cmake b/cmake/vcpkg-triplets/nortti/x86-windows-static-md.cmake index cb1b3cd887932..1d3de9c1420c4 100644 --- a/cmake/vcpkg-triplets/nortti/x86-windows-static-md.cmake +++ b/cmake/vcpkg-triplets/nortti/x86-windows-static-md.cmake @@ -5,7 +5,7 @@ set(VCPKG_CRT_LINKAGE dynamic) set(VCPKG_LIBRARY_LINKAGE static) set(VCPKG_C_FLAGS "/MP /DWIN32 /D_WINDOWS /DWINAPI_FAMILY=100 /DWINVER=0x0A00 /D_WIN32_WINNT=0x0A00 /DNTDDI_VERSION=0x0A000000") set(VCPKG_CXX_FLAGS "/MP /DWIN32 /D_WINDOWS /DWINAPI_FAMILY=100 /DWINVER=0x0A00 /D_WIN32_WINNT=0x0A00 /DNTDDI_VERSION=0x0A000000 /Zc:__cplusplus /GR- /we4541") -list(APPEND VCPKG_CMAKE_CONFIGURE_OPTIONS --compile-no-warning-as-error) +list(APPEND VCPKG_CMAKE_CONFIGURE_OPTIONS --compile-no-warning-as-error -DCMAKE_CXX_STANDARD=17) if(PORT MATCHES "onnx") list(APPEND VCPKG_CMAKE_CONFIGURE_OPTIONS "-DONNX_DISABLE_STATIC_REGISTRATION=ON" diff --git a/cmake/vcpkg-triplets/nortti/x86-windows-static.cmake b/cmake/vcpkg-triplets/nortti/x86-windows-static.cmake index 032021745099a..3a856c26797a4 100644 --- a/cmake/vcpkg-triplets/nortti/x86-windows-static.cmake +++ b/cmake/vcpkg-triplets/nortti/x86-windows-static.cmake @@ -5,7 +5,7 @@ set(VCPKG_CRT_LINKAGE static) set(VCPKG_LIBRARY_LINKAGE static) set(VCPKG_C_FLAGS "/MP /DWIN32 /D_WINDOWS /DWINAPI_FAMILY=100 /DWINVER=0x0A00 /D_WIN32_WINNT=0x0A00 /DNTDDI_VERSION=0x0A000000") set(VCPKG_CXX_FLAGS "/MP /DWIN32 /D_WINDOWS /DWINAPI_FAMILY=100 /DWINVER=0x0A00 /D_WIN32_WINNT=0x0A00 /DNTDDI_VERSION=0x0A000000 /Zc:__cplusplus /GR- /we4541") -list(APPEND VCPKG_CMAKE_CONFIGURE_OPTIONS --compile-no-warning-as-error) +list(APPEND VCPKG_CMAKE_CONFIGURE_OPTIONS --compile-no-warning-as-error -DCMAKE_CXX_STANDARD=17) if(PORT MATCHES "onnx") list(APPEND VCPKG_CMAKE_CONFIGURE_OPTIONS "-DONNX_DISABLE_STATIC_REGISTRATION=ON" diff --git a/csharp/src/Microsoft.ML.OnnxRuntime/Microsoft.ML.OnnxRuntime.csproj b/csharp/src/Microsoft.ML.OnnxRuntime/Microsoft.ML.OnnxRuntime.csproj index 63131d05c03d5..be0e8d2ee58a4 100644 --- a/csharp/src/Microsoft.ML.OnnxRuntime/Microsoft.ML.OnnxRuntime.csproj +++ b/csharp/src/Microsoft.ML.OnnxRuntime/Microsoft.ML.OnnxRuntime.csproj @@ -1,4 +1,4 @@ - + Microsoft.ML.OnnxRuntime @@ -127,6 +127,11 @@ $(OrtConstants);__ENABLE_TRAINING_APIS__ + + + true + + @@ -184,6 +189,10 @@ + + + + diff --git a/csharp/src/Microsoft.ML.OnnxRuntime/OrtValue.shared.cs b/csharp/src/Microsoft.ML.OnnxRuntime/OrtValue.shared.cs index d38748c2f97cc..7a5c3aaa19eac 100644 --- a/csharp/src/Microsoft.ML.OnnxRuntime/OrtValue.shared.cs +++ b/csharp/src/Microsoft.ML.OnnxRuntime/OrtValue.shared.cs @@ -9,6 +9,14 @@ using System.Runtime.InteropServices; using System.Text; +#if NET8_0_OR_GREATER +using System.Diagnostics.CodeAnalysis; +using System.Reflection; +using System.Runtime.CompilerServices; +using SystemNumericsTensors = System.Numerics.Tensors; +using TensorPrimitives = System.Numerics.Tensors.TensorPrimitives; +#endif + namespace Microsoft.ML.OnnxRuntime { /// @@ -205,6 +213,33 @@ public ReadOnlySpan GetTensorDataAsSpan() where T : unmanaged return MemoryMarshal.Cast(byteSpan); } +#if NET8_0_OR_GREATER + /// + /// Returns a ReadOnlyTensorSpan over tensor native buffer that + /// provides a read-only view. + /// + /// Note, that the memory may be device allocated and, therefore, not accessible from the CPU. + /// To get memory descriptor use GetTensorMemoryInfo(). + /// + /// OrtValue must contain a non-string tensor. + /// The span is valid as long as the OrtValue instance is alive (not disposed). + /// + /// + /// ReadOnlySpan + /// + [Experimental("SYSLIB5001")] + public SystemNumericsTensors.ReadOnlyTensorSpan GetTensorDataAsTensorSpan() where T : unmanaged + { + var byteSpan = GetTensorBufferRawData(typeof(T)); + + var typeSpan = MemoryMarshal.Cast(byteSpan); + var shape = GetTypeInfo().TensorTypeAndShapeInfo.Shape; + nint[] nArray = Array.ConvertAll(shape, new Converter(x => (nint)x)); + + return new SystemNumericsTensors.ReadOnlyTensorSpan(typeSpan, nArray, []); + } +#endif + /// /// Returns a Span over tensor native buffer. /// This enables you to safely and efficiently modify the underlying @@ -225,6 +260,32 @@ public Span GetTensorMutableDataAsSpan() where T : unmanaged return MemoryMarshal.Cast(byteSpan); } +#if NET8_0_OR_GREATER + /// + /// Returns a TensorSpan over tensor native buffer. + /// + /// Note, that the memory may be device allocated and, therefore, not accessible from the CPU. + /// To get memory descriptor use GetTensorMemoryInfo(). + /// + /// OrtValue must contain a non-string tensor. + /// The span is valid as long as the OrtValue instance is alive (not disposed). + /// + /// + /// ReadOnlySpan + /// + [Experimental("SYSLIB5001")] + public SystemNumericsTensors.TensorSpan GetTensorMutableDataAsTensorSpan() where T : unmanaged + { + var byteSpan = GetTensorBufferRawData(typeof(T)); + + var typeSpan = MemoryMarshal.Cast(byteSpan); + var shape = GetTypeInfo().TensorTypeAndShapeInfo.Shape; + nint[] nArray = Array.ConvertAll(shape, new Converter(x => (nint)x)); + + return new SystemNumericsTensors.TensorSpan(typeSpan, nArray, []); + } +#endif + /// /// Provides mutable raw native buffer access. /// @@ -234,6 +295,23 @@ public Span GetTensorMutableRawData() return GetTensorBufferRawData(typeof(byte)); } +#if NET8_0_OR_GREATER + /// + /// Provides mutable raw native buffer access. + /// + /// TensorSpan over the native buffer bytes + [Experimental("SYSLIB5001")] + public SystemNumericsTensors.TensorSpan GetTensorSpanMutableRawData() where T : unmanaged + { + var byteSpan = GetTensorBufferRawData(typeof(T)); + + var shape = GetTypeInfo().TensorTypeAndShapeInfo.Shape; + nint[] nArray = Array.ConvertAll(shape, new Converter(x => (nint)x)); + + return new SystemNumericsTensors.TensorSpan(byteSpan, nArray, []); + } +#endif + /// /// Fetch string tensor element buffer pointer at the specified index, /// convert/copy to UTF-16 char[] and return a ReadOnlyMemory{char} instance. @@ -605,6 +683,80 @@ public static OrtValue CreateTensorValueFromMemory(T[] data, long[] shape) wh return OrtValue.CreateTensorValueFromMemory(OrtMemoryInfo.DefaultInstance, new Memory(data), shape); } +#if NET8_0_OR_GREATER + /// + /// This is a factory method creates a native Onnxruntime OrtValue containing a tensor on top of the existing tensor managed memory. + /// The method will attempt to pin managed memory so no copying occurs when data is passed down + /// to native code. + /// + /// Tensor object + /// discovered tensor element type + /// And instance of OrtValue constructed on top of the object + [Experimental("SYSLIB5001")] + public static OrtValue CreateTensorValueFromSystemNumericsTensorObject(SystemNumericsTensors.Tensor tensor) where T : unmanaged + { + if (!IsContiguousAndDense(tensor)) + { + var newTensor = SystemNumericsTensors.Tensor.Create(tensor.Lengths); + tensor.CopyTo(newTensor); + tensor = newTensor; + } + unsafe + { + var backingData = (T[])tensor.GetType().GetField("_values", BindingFlags.Instance | BindingFlags.NonPublic).GetValue(tensor); + GCHandle handle = GCHandle.Alloc(backingData, GCHandleType.Pinned); + var memHandle = new MemoryHandle(Unsafe.AsPointer(ref tensor.GetPinnableReference()), handle); + + try + { + IntPtr dataBufferPointer = IntPtr.Zero; + unsafe + { + dataBufferPointer = (IntPtr)memHandle.Pointer; + } + + var bufferLengthInBytes = tensor.FlattenedLength * sizeof(T); + long[] shape = Array.ConvertAll(tensor.Lengths.ToArray(), new Converter(x => (long)x)); + + var typeInfo = TensorBase.GetTypeInfo(typeof(T)) ?? + throw new OnnxRuntimeException(ErrorCode.InvalidArgument, $"Tensor of type: {typeof(T)} is not supported"); + + NativeApiStatus.VerifySuccess(NativeMethods.OrtCreateTensorWithDataAsOrtValue( + OrtMemoryInfo.DefaultInstance.Pointer, + dataBufferPointer, + (UIntPtr)(bufferLengthInBytes), + shape, + (UIntPtr)tensor.Rank, + typeInfo.ElementType, + out IntPtr nativeValue)); + + return new OrtValue(nativeValue, memHandle); + } + catch (Exception) + { + memHandle.Dispose(); + throw; + } + } + } + + [Experimental("SYSLIB5001")] + private static bool IsContiguousAndDense(SystemNumericsTensors.Tensor tensor) where T : unmanaged + { + // Right most dimension must be 1 for a dense tensor. + if (tensor.Strides[^1] != 1) + return false; + + // For other dimensions, the stride must be equal to the product of the dimensions to the right. + for (int i = tensor.Rank - 2; i >= 0; i--) + { + if (tensor.Strides[i] != TensorPrimitives.Product(tensor.Lengths.Slice(i + 1, tensor.Lengths.Length - i - 1))) + return false; + } + return true; + } +#endif + /// /// The factory API creates an OrtValue with memory allocated using the given allocator /// according to the specified shape and element type. The memory will be released when OrtValue diff --git a/csharp/test/Microsoft.ML.OnnxRuntime.Tests.NetCoreApp/InferenceTest.netcore.cs b/csharp/test/Microsoft.ML.OnnxRuntime.Tests.NetCoreApp/InferenceTest.netcore.cs index ff5fd2de54197..816511150a137 100644 --- a/csharp/test/Microsoft.ML.OnnxRuntime.Tests.NetCoreApp/InferenceTest.netcore.cs +++ b/csharp/test/Microsoft.ML.OnnxRuntime.Tests.NetCoreApp/InferenceTest.netcore.cs @@ -7,6 +7,10 @@ using System.Text.RegularExpressions; using Xunit; +#if NET8_0_OR_GREATER +using SystemNumericsTensors = System.Numerics.Tensors; +#endif + namespace Microsoft.ML.OnnxRuntime.Tests { /// @@ -67,6 +71,194 @@ public void CanCreateAndDisposeSessionWithModelPath() } } +#if NET8_0_OR_GREATER +#pragma warning disable SYSLIB5001 // System.Numerics.Tensors is only in preview so we can continue receiving API feedback + [Theory] + [InlineData(GraphOptimizationLevel.ORT_DISABLE_ALL, true)] + [InlineData(GraphOptimizationLevel.ORT_DISABLE_ALL, false)] + [InlineData(GraphOptimizationLevel.ORT_ENABLE_EXTENDED, true)] + [InlineData(GraphOptimizationLevel.ORT_ENABLE_EXTENDED, false)] + private void CanRunInferenceOnAModelDotnetTensors(GraphOptimizationLevel graphOptimizationLevel, bool enableParallelExecution) + { + var model = TestDataLoader.LoadModelFromEmbeddedResource("squeezenet.onnx"); + + using (var cleanUp = new DisposableListTest()) + { + // Set the graph optimization level for this session. + SessionOptions options = new SessionOptions(); + cleanUp.Add(options); + options.GraphOptimizationLevel = graphOptimizationLevel; + + var session = new InferenceSession(model, options); + cleanUp.Add(session); + + using var runOptions = new RunOptions(); + var inputMeta = session.InputMetadata; + var outputMeta = session.OutputMetadata; + + float[] expectedOutput = TestDataLoader.LoadTensorFromEmbeddedResource("bench.expected_out"); + long[] expectedDimensions = { 1, 1000, 1, 1 }; // hardcoded for now for the test data + ReadOnlySpan expectedOutputDimensions = expectedDimensions; + + float[] inputData = TestDataLoader.LoadTensorFromEmbeddedResource("bench.in"); // this is the data for only one input tensor for this model + + using var inputOrtValues = new DisposableListTest>(session.InputMetadata.Count); + + foreach (var name in inputMeta.Keys) + { + Assert.Equal(typeof(float), inputMeta[name].ElementType); + Assert.True(inputMeta[name].IsTensor); + var tensor = SystemNumericsTensors.Tensor.Create(inputData, inputMeta[name].Dimensions.Select(x => (nint)x).ToArray()); + inputOrtValues.Add(new DisposableTestPair(name, OrtValue.CreateTensorValueFromSystemNumericsTensorObject(tensor))); + + } + + runOptions.LogId = "CsharpTest"; + runOptions.Terminate = false; // TODO: Test terminate = true, it currently crashes + runOptions.LogSeverityLevel = OrtLoggingLevel.ORT_LOGGING_LEVEL_ERROR; + // Run inference with named inputs and outputs created with in Run() + using (var results = session.Run(runOptions, inputOrtValues.Select(x => x.Key).ToList(), inputOrtValues.Select(x => x.Value).ToList(), new List(["softmaxout_1"]))) // results is an IDisposableReadOnlyCollection container + { + // validate the results + foreach (var r in results) + { + Assert.Single(results); + + ValidateRunResult(r, expectedOutput, expectedDimensions); + } + } + } + } + + [Fact] + public void InferenceSessionDisposedDotnetTensors() + { + var model = TestDataLoader.LoadModelFromEmbeddedResource("squeezenet.onnx"); + + // Set the graph optimization level for this session. + using (SessionOptions options = new SessionOptions()) + { + options.ProfileOutputPathPrefix = "Ort_P_"; + options.EnableProfiling = true; + using (var session = new InferenceSession(model, options)) + { + var inputMeta = session.InputMetadata; + var container = new List(); + + float[] inputData = TestDataLoader.LoadTensorFromEmbeddedResource("bench.in"); // this is the data for only one input tensor for this model + + using (var runOptions = new RunOptions()) + using (var inputOrtValues = new DisposableListTest>(session.InputMetadata.Count)) + using (var outputOrtValues = new DisposableListTest>(session.OutputMetadata.Count)) + { + + foreach (var name in inputMeta.Keys) + { + Assert.Equal(typeof(float), inputMeta[name].ElementType); + Assert.True(inputMeta[name].IsTensor); + var tensor = SystemNumericsTensors.Tensor.Create(inputData, inputMeta[name].Dimensions.Select(x => (nint) x).ToArray()); + inputOrtValues.Add(new DisposableTestPair(name, OrtValue.CreateTensorValueFromSystemNumericsTensorObject(tensor))); + } + + // Run inference with named inputs and outputs created with in Run() + using (var results = session.Run(runOptions, inputOrtValues.Select(x => x.Key).ToList(), inputOrtValues.Select(x => x.Value).ToList(), new List(["softmaxout_1"]))) // results is an IDisposableReadOnlyCollection container + { + // validate the results + foreach (var r in results) + { + Assert.Single(results); + + float[] expectedOutput = TestDataLoader.LoadTensorFromEmbeddedResource("bench.expected_out"); + long[] expectedDimensions = { 1, 1000, 1, 1 }; // hardcoded for now for the test data + ValidateRunResult(r, expectedOutput, expectedDimensions); + } + } + } + + string profile_file = session.EndProfiling(); + + // Profile file should have the output path prefix in it + Assert.Contains("Ort_P_", profile_file); + } + } + } + + [Fact] + private void ThrowWrongOutputNameDotnetTensors() + { + var tuple = OpenSessionSqueezeNet(); + var session = tuple.Item1; + var inputData = tuple.Item2; + var inputTensor = tuple.Item3; + + using (var runOptions = new RunOptions()) + using (var inputOrtValues = new DisposableListTest>(session.InputMetadata.Count)) + using (var outputOrtValues = new DisposableListTest>(session.OutputMetadata.Count)) + { + var tensor = SystemNumericsTensors.Tensor.Create(inputData, Array.ConvertAll(inputTensor.Dimensions.ToArray(), x => (nint)x)); + + inputOrtValues.Add(new DisposableTestPair("data_0", OrtValue.CreateTensorValueFromSystemNumericsTensorObject(tensor))); + outputOrtValues.Add(new DisposableTestPair("bad_output_name", OrtValue.CreateTensorValueFromSystemNumericsTensorObject(tensor))); + + var ex = Assert.Throws(() => session.Run(runOptions, ["data_0"], [inputOrtValues[0].Value], ["bad_output_name"], [outputOrtValues[0].Value])); + Assert.Contains("Output name: 'bad_output_name' is not in the metadata", ex.Message); + } + + session.Dispose(); + } + + [Fact] + private void ThrowWrongOutputDimensionDotnetTensors() + { + var tuple = OpenSessionSqueezeNet(); + var session = tuple.Item1; + var inputData = tuple.Item2; + var inputTensor = tuple.Item3; + var outputTensor = SystemNumericsTensors.Tensor.Create([1, 1001, 1, 1]); + + using (var runOptions = new RunOptions()) + using (var inputOrtValues = new DisposableListTest>(session.InputMetadata.Count)) + using (var outputOrtValues = new DisposableListTest>(session.OutputMetadata.Count)) + { + var tensor = SystemNumericsTensors.Tensor.Create(inputData, Array.ConvertAll(inputTensor.Dimensions.ToArray(), x => (nint)x)); + + inputOrtValues.Add(new DisposableTestPair("data_0", OrtValue.CreateTensorValueFromSystemNumericsTensorObject(tensor))); + outputOrtValues.Add(new DisposableTestPair("softmaxout_1", OrtValue.CreateTensorValueFromSystemNumericsTensorObject(outputTensor))); + + var ex = Assert.Throws(() => session.Run(runOptions, ["data_0"], [inputOrtValues[0].Value], ["softmaxout_1"], [outputOrtValues[0].Value])); + } + + session.Dispose(); + } + + [Fact] + private void ThrowInconsistentPinnedOutputsDotnetTensors() + { + var tuple = OpenSessionSqueezeNet(); + using var cleanUp = new DisposableListTest(); + cleanUp.Add(tuple.Item1); + var session = tuple.Item1; + var inputData = tuple.Item2; + var inputTensor = tuple.Item3; + var outputTensor = SystemNumericsTensors.Tensor.Create([1, 1001, 1, 1], [4]); + + using (var runOptions = new RunOptions()) + using (var inputOrtValues = new DisposableListTest>(session.InputMetadata.Count)) + using (var outputOrtValues = new DisposableListTest>(session.OutputMetadata.Count)) + { + var tensor = SystemNumericsTensors.Tensor.Create(inputData, Array.ConvertAll(inputTensor.Dimensions.ToArray(), x => (nint)x)); + + inputOrtValues.Add(new DisposableTestPair("data_0", OrtValue.CreateTensorValueFromSystemNumericsTensorObject(tensor))); + outputOrtValues.Add(new DisposableTestPair("softmaxout_1", OrtValue.CreateTensorValueFromSystemNumericsTensorObject(outputTensor))); + OrtValue[] outputs = []; + var ex = Assert.Throws(() => session.Run(runOptions, ["data_0"], [inputOrtValues[0].Value], ["softmaxout_1"], outputs)); + Assert.StartsWith("Length of outputNames (1) must match that of outputValues (0).", ex.Message); + } + } +#pragma warning restore SYSLIB5001 // System.Numerics.Tensors is only in preview so we can continue receiving API feedback +#endif + + #if USE_CUDA [Fact(DisplayName = "TestCUDAProviderOptions")] private void TestCUDAProviderOptions() @@ -1416,6 +1608,25 @@ private void VerifyNativeMethodsExist() } } +#if NET8_0_OR_GREATER +#pragma warning disable SYSLIB5001 // Type is for evaluation purposes only and is subject to change or removal in future updates. Suppress this diagnostic to proceed. + private void ValidateRunResultData(SystemNumericsTensors.Tensor resultTensor, float[] expectedOutput, int[] expectedDimensions) + { + Assert.Equal(expectedDimensions.Length, resultTensor.Rank); + + var resultDimensions = resultTensor.Lengths; + for (int i = 0; i < expectedDimensions.Length; i++) + { + Assert.Equal(expectedDimensions[i], resultDimensions[i]); + } + + var resultArray = resultTensor.ToArray(); + Assert.Equal(expectedOutput.Length, resultArray.Length); + Assert.Equal(expectedOutput, resultArray, new FloatComparer()); + } +#pragma warning restore SYSLIB5001 // Type is for evaluation purposes only and is subject to change or removal in future updates. Suppress this diagnostic to proceed. +#endif + static string GetTestModelsDir() { // get build directory, append downloaded models location diff --git a/csharp/tools/MauiModelTester/create_test_data.py b/csharp/tools/MauiModelTester/create_test_data.py index 6c57c71f94216..d73fd950a7bc0 100644 --- a/csharp/tools/MauiModelTester/create_test_data.py +++ b/csharp/tools/MauiModelTester/create_test_data.py @@ -2,7 +2,6 @@ import shutil import sys from pathlib import Path -from typing import Dict, List, Optional import numpy as np @@ -84,7 +83,7 @@ def parse_args(): return args -def create_existing_data_map(pb_files: List[Path]): +def create_existing_data_map(pb_files: list[Path]): import onnx_test_data_utils as data_utils data_map = {} @@ -98,9 +97,9 @@ def create_existing_data_map(pb_files: List[Path]): def add_model_and_test_data_to_app( model_path: Path, - symbolic_dims: Optional[Dict[str, int]] = None, - input_map: Optional[Dict[str, np.ndarray]] = None, - output_map: Optional[Dict[str, np.ndarray]] = None, + symbolic_dims: dict[str, int] | None = None, + input_map: dict[str, np.ndarray] | None = None, + output_map: dict[str, np.ndarray] | None = None, ): import ort_test_dir_utils as utils diff --git a/csharp/tools/linux_pack/LinuxPackNativeNuget.csproj b/csharp/tools/linux_pack/LinuxPackNativeNuget.csproj index 098078d2e3683..b814f99b05ae1 100644 --- a/csharp/tools/linux_pack/LinuxPackNativeNuget.csproj +++ b/csharp/tools/linux_pack/LinuxPackNativeNuget.csproj @@ -7,7 +7,7 @@ If you need a more sophisticated package for testing, you can run the production packaging pipeline against your branch and download the resulting nuget package from the build artifacts. --> - + netstandard2.0 $(OnnxRuntimeBuildDirectory)/NativeNuget.nuspec diff --git a/dockerfiles/Dockerfile.cuda b/dockerfiles/Dockerfile.cuda index 40f11dca623a7..db1a92a27a5bd 100644 --- a/dockerfiles/Dockerfile.cuda +++ b/dockerfiles/Dockerfile.cuda @@ -12,7 +12,9 @@ ARG OS=ubuntu24.04 FROM nvcr.io/nvidia/cuda:${CUDA_VERSION}-devel-${OS} ARG CUDA_VERSION ARG CUDNN_VERSION -ARG CMAKE_CUDA_ARCHITECTURES="61;70;75;80;86;90" +# Adjust as needed +# Check your CUDA arch: https://developer.nvidia.com/cuda-gpus +ARG CMAKE_CUDA_ARCHITECTURES="75;80;90" ENV DEBIAN_FRONTEND=noninteractive diff --git a/dockerfiles/Dockerfile.tensorrt b/dockerfiles/Dockerfile.tensorrt index 24947df6308a6..c1fdf73e0fea7 100644 --- a/dockerfiles/Dockerfile.tensorrt +++ b/dockerfiles/Dockerfile.tensorrt @@ -10,7 +10,9 @@ FROM nvcr.io/nvidia/tensorrt:${TRT_CONTAINER_VERSION}-py3 ARG ONNXRUNTIME_REPO=https://github.com/Microsoft/onnxruntime ARG ONNXRUNTIME_BRANCH=main -ARG CMAKE_CUDA_ARCHITECTURES=37;50;52;60;61;70;75;80 +# Adjust as needed +# Check your CUDA arch: https://developer.nvidia.com/cuda-gpus +ARG CMAKE_CUDA_ARCHITECTURES=75;80;90 RUN apt-get update &&\ apt-get install -y sudo git bash unattended-upgrades diff --git a/docs/How_To_Update_ONNX_Dev_Notes.md b/docs/How_To_Update_ONNX_Dev_Notes.md index 199e6671f6a1a..997812d7e7acf 100644 --- a/docs/How_To_Update_ONNX_Dev_Notes.md +++ b/docs/How_To_Update_ONNX_Dev_Notes.md @@ -36,10 +36,10 @@ This file should be generated. See [cgmanifests/README](/cgmanifests/README.md) 1. If there is a build failure in stage "Check out of dated documents" in WebAssembly CI pipeline, update ONNX Runtime Web WebGL operator support document: - Make sure Node.js is installed (see [Prerequisites](../js/README.md#Prerequisites) for instructions). - - Follow step 1 in [js/Build](../js/README.md#Build-2) to install dependencies). + - Follow [js/Build](../js/README.md#Build-2) to install dependencies. - Follow instructions in [Generate document](../js/README.md#Generating-Document) to update document. Commit changes applied to file `docs/operators.md`. -1. Usually some newly introduced tests will fail. Then you may need to update +2. Usually some newly introduced tests will fail. Then you may need to update - [onnxruntime/test/onnx/main.cc](/onnxruntime/test/onnx/main.cc) - [onnxruntime/test/providers/cpu/model_tests.cc](/onnxruntime/test/providers/cpu/model_tests.cc) - [csharp/test/Microsoft.ML.OnnxRuntime.Tests.NetCoreApp/InferenceTest.netcore.cs](/csharp/test/Microsoft.ML.OnnxRuntime.Tests.NetCoreApp/InferenceTest.netcore.cs) diff --git a/docs/OperatorKernels.md b/docs/OperatorKernels.md index eeb8ebb3ccefe..98dcb777422bc 100644 --- a/docs/OperatorKernels.md +++ b/docs/OperatorKernels.md @@ -19,11 +19,13 @@ Do not modify directly.* |**Operator Domain:** *ai.onnx*|||| |Abs|*in* X:**T**
*out* Y:**T**|13+|**T** = tensor(double), tensor(float), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)| |||[6, 12]|**T** = tensor(double), tensor(float), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)| -|Acos|*in* input:**T**
*out* output:**T**|7+|**T** = tensor(float)| -|Acosh|*in* input:**T**
*out* output:**T**|9+|**T** = tensor(float)| -|Add|*in* A:**T**
*in* B:**T**
*out* C:**T**|14+|**T** = tensor(double), tensor(float), tensor(int32), tensor(int64)| -|||13|**T** = tensor(double), tensor(float), tensor(int32), tensor(int64)| -|||[7, 12]|**T** = tensor(double), tensor(float), tensor(int32), tensor(int64)| +|Acos|*in* input:**T**
*out* output:**T**|22+|**T** = tensor(float)| +|||[7, 21]|**T** = tensor(float)| +|Acosh|*in* input:**T**
*out* output:**T**|22+|**T** = tensor(float)| +|||[9, 21]|**T** = tensor(float)| +|Add|*in* A:**T**
*in* B:**T**
*out* C:**T**|14+|**T** = tensor(double), tensor(float), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)| +|||13|**T** = tensor(double), tensor(float), tensor(int32), tensor(int64), tensor(uint32), tensor(uint64)| +|||[7, 12]|**T** = tensor(double), tensor(float), tensor(int32), tensor(int64), tensor(uint32), tensor(uint64)| |Affine|*in* X:**T**
*out* Y:**T**|1+|**T** = tensor(float)| |AffineGrid|*in* theta:**T1**
*in* size:**T2**
*out* grid:**T1**|20+|**T1** = tensor(double), tensor(float)
**T2** = tensor(int64)| |And|*in* A:**T**
*in* B:**T**
*out* C:**T1**|7+|**T** = tensor(bool)
**T1** = tensor(bool)| @@ -33,11 +35,16 @@ Do not modify directly.* |ArgMin|*in* data:**T**
*out* reduced:**tensor(int64)**|13+|**T** = tensor(double), tensor(float), tensor(int32), tensor(int64), tensor(int8), tensor(uint8)| |||[11, 12]|**T** = tensor(double), tensor(float), tensor(int32), tensor(int64), tensor(int8), tensor(uint8)| |||[1, 10]|**T** = tensor(double), tensor(float), tensor(int32), tensor(int64), tensor(int8), tensor(uint8)| -|Asin|*in* input:**T**
*out* output:**T**|7+|**T** = tensor(float)| -|Asinh|*in* input:**T**
*out* output:**T**|9+|**T** = tensor(float)| -|Atan|*in* input:**T**
*out* output:**T**|7+|**T** = tensor(float)| -|Atanh|*in* input:**T**
*out* output:**T**|9+|**T** = tensor(float)| -|AveragePool|*in* X:**T**
*out* Y:**T**|19+|**T** = tensor(float)| +|Asin|*in* input:**T**
*out* output:**T**|22+|**T** = tensor(float)| +|||[7, 21]|**T** = tensor(float)| +|Asinh|*in* input:**T**
*out* output:**T**|22+|**T** = tensor(float)| +|||[9, 21]|**T** = tensor(float)| +|Atan|*in* input:**T**
*out* output:**T**|22+|**T** = tensor(float)| +|||[7, 21]|**T** = tensor(float)| +|Atanh|*in* input:**T**
*out* output:**T**|22+|**T** = tensor(float)| +|||[9, 21]|**T** = tensor(float)| +|AveragePool|*in* X:**T**
*out* Y:**T**|22+|**T** = tensor(float)| +|||[19, 21]|**T** = tensor(float)| |||[11, 18]|**T** = tensor(float)| |||10|**T** = tensor(float)| |||[7, 9]|**T** = tensor(float)| @@ -72,37 +79,44 @@ Do not modify directly.* |ConstantOfShape|*in* input:**T1**
*out* output:**T2**|21+|**T1** = tensor(int64)
**T2** = tensor(bfloat16), tensor(bool), tensor(double), tensor(float), tensor(float16), tensor(float8e4m3fn), tensor(float8e4m3fnuz), tensor(float8e5m2), tensor(float8e5m2fnuz), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)| |||20|**T1** = tensor(int64)
**T2** = tensor(bfloat16), tensor(bool), tensor(double), tensor(float), tensor(float16), tensor(float8e4m3fn), tensor(float8e4m3fnuz), tensor(float8e5m2), tensor(float8e5m2fnuz), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)| |||[9, 19]|**T1** = tensor(int64)
**T2** = tensor(bfloat16), tensor(bool), tensor(double), tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)| -|Conv|*in* X:**T**
*in* W:**T**
*in* B:**T**
*out* Y:**T**|11+|**T** = tensor(float)| +|Conv|*in* X:**T**
*in* W:**T**
*in* B:**T**
*out* Y:**T**|22+|**T** = tensor(float)| +|||[11, 21]|**T** = tensor(float)| |||[1, 10]|**T** = tensor(float)| |ConvInteger|*in* x:**T1**
*in* w:**T2**
*in* x_zero_point:**T1**
*in* w_zero_point:**T2**
*out* y:**T3**|10+|**T1** = tensor(uint8)
**T2** = tensor(uint8)
**T3** = tensor(int32)| -|ConvTranspose|*in* X:**T**
*in* W:**T**
*in* B:**T**
*out* Y:**T**|11+|**T** = tensor(float)| +|ConvTranspose|*in* X:**T**
*in* W:**T**
*in* B:**T**
*out* Y:**T**|22+|**T** = tensor(float)| +|||[11, 21]|**T** = tensor(float)| |||[1, 10]|**T** = tensor(float)| -|Cos|*in* input:**T**
*out* output:**T**|7+|**T** = tensor(float)| -|Cosh|*in* input:**T**
*out* output:**T**|9+|**T** = tensor(float)| +|Cos|*in* input:**T**
*out* output:**T**|22+|**T** = tensor(float)| +|||[7, 21]|**T** = tensor(float)| +|Cosh|*in* input:**T**
*out* output:**T**|22+|**T** = tensor(float)| +|||[9, 21]|**T** = tensor(float)| |Crop|*in* input:**T**
*out* output:**T**|1+|**T** = tensor(float)| |CumSum|*in* x:**T**
*in* axis:**T2**
*out* y:**T**|14+|**T** = tensor(double), tensor(float), tensor(int32), tensor(int64)
**T2** = tensor(int32), tensor(int64)| |||[11, 13]|**T** = tensor(double), tensor(float), tensor(int32), tensor(int64)
**T2** = tensor(int32), tensor(int64)| |DFT|*in* input:**T1**
*in* dft_length:**T2**
*in* axis:**tensor(int64)**
*out* output:**T1**

or

*in* input:**T1**
*in* dft_length:**T2**
*out* output:**T1**|20+|**T1** = tensor(double), tensor(float)
**T2** = tensor(int32), tensor(int64)| |||[17, 19]|**T1** = tensor(double), tensor(float)
**T2** = tensor(int32), tensor(int64)| -|DepthToSpace|*in* input:**T**
*out* output:**T**|13+|**T** = tensor(double), tensor(float)| -|||[11, 12]|**T** = tensor(double), tensor(float)| +|DepthToSpace|*in* input:**T**
*out* output:**T**|13+|**T** = tensor(double), tensor(float), tensor(uint8)| +|||[11, 12]|**T** = tensor(double), tensor(float), tensor(uint8)| |||[1, 10]|**T** = tensor(double), tensor(float)| |DequantizeLinear|*in* x:**T**
*in* x_scale:**tensor(float)**
*in* x_zero_point:**T**
*out* y:**tensor(float)**

or

*in* x:**T1**
*in* x_scale:**T2**
*in* x_zero_point:**T1**
*out* y:**T2**|21+|**T1** = tensor(float8e4m3fn), tensor(float8e4m3fnuz), tensor(float8e5m2), tensor(float8e5m2fnuz), tensor(int16), tensor(int32), tensor(int4), tensor(int8), tensor(uint16), tensor(uint4), tensor(uint8)
**T2** = tensor(float), tensor(float16)| |||[19, 20]|**T1** = tensor(float8e4m3fn), tensor(float8e4m3fnuz), tensor(float8e5m2), tensor(float8e5m2fnuz), tensor(int32), tensor(int8), tensor(uint8)
**T2** = tensor(float), tensor(float16)| |||[13, 18]|**T** = tensor(int32), tensor(int8), tensor(uint8)| |||[10, 12]|**T** = tensor(int32), tensor(int8), tensor(uint8)| -|Det|*in* X:**T**
*out* Y:**T**|11+|**T** = tensor(float)| -|Div|*in* A:**T**
*in* B:**T**
*out* C:**T**|14+|**T** = tensor(double), tensor(float), tensor(int32), tensor(int64)| -|||13|**T** = tensor(double), tensor(float), tensor(int32), tensor(int64)| -|||[7, 12]|**T** = tensor(double), tensor(float), tensor(int32), tensor(int64)| -|Dropout|*in* data:**T**
*in* ratio:**T1**
*in* training_mode:**T2**
*out* output:**T**
*out* mask:**T2**

or

*in* data:**T**
*out* output:**T**
*out* mask:**T**

or

*in* data:**T**
*out* output:**T**
*out* mask:**T1**|13+|**T** = tensor(double), tensor(float)
**T1** = tensor(double), tensor(float)
**T2** = tensor(bool)| +|Det|*in* X:**T**
*out* Y:**T**|22+|**T** = tensor(float)| +|||[11, 21]|**T** = tensor(float)| +|Div|*in* A:**T**
*in* B:**T**
*out* C:**T**|14+|**T** = tensor(double), tensor(float), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)| +|||13|**T** = tensor(double), tensor(float), tensor(int32), tensor(int64), tensor(uint32), tensor(uint64)| +|||[7, 12]|**T** = tensor(double), tensor(float), tensor(int32), tensor(int64), tensor(uint32), tensor(uint64)| +|Dropout|*in* data:**T**
*in* ratio:**T1**
*in* training_mode:**T2**
*out* output:**T**
*out* mask:**T2**

or

*in* data:**T**
*out* output:**T**
*out* mask:**T**

or

*in* data:**T**
*out* output:**T**
*out* mask:**T1**|22+|**T** = tensor(double), tensor(float)
**T1** = tensor(double), tensor(float)
**T2** = tensor(bool)| +|||[13, 21]|**T** = tensor(double), tensor(float)
**T1** = tensor(double), tensor(float)
**T2** = tensor(bool)| |||12|**T** = tensor(double), tensor(float)
**T1** = tensor(double), tensor(float)
**T2** = tensor(bool)| |||[10, 11]|**T** = tensor(double), tensor(float), tensor(float16)
**T1** = tensor(bool)| |||[7, 9]|**T** = tensor(double), tensor(float), tensor(float16)| |DynamicQuantizeLinear|*in* x:**T1**
*out* y:**T2**
*out* y_scale:**tensor(float)**
*out* y_zero_point:**T2**|11+|**T2** = tensor(uint8)| |DynamicSlice|*in* data:**T**
*in* starts:**Tind**
*in* ends:**Tind**
*in* axes:**Tind**
*out* output:**T**|1+|**T** = tensor(bfloat16), tensor(bool), tensor(double), tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(string), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)
**Tind** = tensor(int32), tensor(int64)| |Einsum|*in* Inputs:**T**
*out* Output:**T**|12+|**T** = tensor(double), tensor(float), tensor(int32), tensor(int64)| -|Elu|*in* X:**T**
*out* Y:**T**|6+|**T** = tensor(float)| +|Elu|*in* X:**T**
*out* Y:**T**|22+|**T** = tensor(float)| +|||[6, 21]|**T** = tensor(float)| |Equal|*in* A:**T**
*in* B:**T**
*out* C:**T1**|19+|**T** = tensor(bool), tensor(double), tensor(float), tensor(int32), tensor(int64), tensor(string)
**T1** = tensor(bool)| |||[13, 18]|**T** = tensor(bool), tensor(double), tensor(float), tensor(int32), tensor(int64)
**T1** = tensor(bool)| |||[11, 12]|**T** = tensor(bool), tensor(double), tensor(float), tensor(int32), tensor(int64)
**T1** = tensor(bool)| @@ -113,7 +127,8 @@ Do not modify directly.* |||[6, 12]|**T** = tensor(double), tensor(float)| |Expand|*in* input:**T**
*in* shape:**tensor(int64)**
*out* output:**T**|13+|**T** = tensor(bool), tensor(double), tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(string), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)| |||[8, 12]|**T** = tensor(bool), tensor(double), tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(string), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)| -|EyeLike|*in* input:**T1**
*out* output:**T2**|9+|**T1** = tensor(double), tensor(float), tensor(int32), tensor(int64), tensor(uint64)
**T2** = tensor(double), tensor(float), tensor(int32), tensor(int64), tensor(uint64)| +|EyeLike|*in* input:**T1**
*out* output:**T2**|22+|**T1** = tensor(double), tensor(float), tensor(int32), tensor(int64), tensor(uint64)
**T2** = tensor(double), tensor(float), tensor(int32), tensor(int64), tensor(uint64)| +|||[9, 21]|**T1** = tensor(double), tensor(float), tensor(int32), tensor(int64), tensor(uint64)
**T2** = tensor(double), tensor(float), tensor(int32), tensor(int64), tensor(uint64)| |Flatten|*in* input:**T**
*out* output:**T**|21+|**T** = tensor(bfloat16), tensor(bool), tensor(double), tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(string), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)| |||[13, 20]|**T** = tensor(bfloat16), tensor(bool), tensor(double), tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(string), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)| |||[11, 12]|**T** = tensor(bfloat16), tensor(bool), tensor(double), tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(string), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)| @@ -121,7 +136,8 @@ Do not modify directly.* |||[1, 8]|**T** = tensor(bfloat16), tensor(bool), tensor(double), tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(string), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)| |Floor|*in* X:**T**
*out* Y:**T**|13+|**T** = tensor(double), tensor(float)| |||[6, 12]|**T** = tensor(double), tensor(float)| -|GRU|*in* X:**T**
*in* W:**T**
*in* R:**T**
*in* B:**T**
*in* sequence_lens:**T1**
*in* initial_h:**T**
*out* Y:**T**
*out* Y_h:**T**|14+|**T** = tensor(double), tensor(float)
**T1** = tensor(int32)| +|GRU|*in* X:**T**
*in* W:**T**
*in* R:**T**
*in* B:**T**
*in* sequence_lens:**T1**
*in* initial_h:**T**
*out* Y:**T**
*out* Y_h:**T**|22+|**T** = tensor(double), tensor(float)
**T1** = tensor(int32)| +|||[14, 21]|**T** = tensor(double), tensor(float)
**T1** = tensor(int32)| |||[7, 13]|**T** = tensor(double), tensor(float)
**T1** = tensor(int32)| |Gather|*in* data:**T**
*in* indices:**Tind**
*out* output:**T**|13+|**T** = tensor(bfloat16), tensor(bool), tensor(double), tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(string), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)
**Tind** = tensor(int32), tensor(int64)| |||[11, 12]|**T** = tensor(bfloat16), tensor(bool), tensor(double), tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(string), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)
**Tind** = tensor(int32), tensor(int64)| @@ -136,19 +152,23 @@ Do not modify directly.* |||[11, 12]|**T** = tensor(double), tensor(float)| |||[9, 10]|**T** = tensor(double), tensor(float)| |||[7, 8]|**T** = tensor(double), tensor(float)| -|GlobalAveragePool|*in* X:**T**
*out* Y:**T**|1+|**T** = tensor(float)| +|GlobalAveragePool|*in* X:**T**
*out* Y:**T**|22+|**T** = tensor(float)| +|||[1, 21]|**T** = tensor(float)| |GlobalLpPool|*in* X:**T**
*out* Y:**T**|2+|**T** = tensor(float)| -|GlobalMaxPool|*in* X:**T**
*out* Y:**T**|1+|**T** = tensor(float)| +|GlobalMaxPool|*in* X:**T**
*out* Y:**T**|22+|**T** = tensor(float)| +|||[1, 21]|**T** = tensor(float)| |Greater|*in* A:**T**
*in* B:**T**
*out* C:**T1**|13+|**T** = tensor(double), tensor(float), tensor(int32), tensor(int64)
**T1** = tensor(bool)| |||[9, 12]|**T** = tensor(double), tensor(float), tensor(int32), tensor(int64)
**T1** = tensor(bool)| |||[7, 8]|**T** = tensor(double), tensor(float)
**T1** = tensor(bool)| |GreaterOrEqual|*in* A:**T**
*in* B:**T**
*out* C:**T1**|16+|**T** = tensor(double), tensor(float), tensor(int32), tensor(int64)
**T1** = tensor(bool)| |||[12, 15]|**T** = tensor(double), tensor(float), tensor(int32), tensor(int64)
**T1** = tensor(bool)| -|GridSample|*in* X:**T1**
*in* grid:**T2**
*out* Y:**T1**|20+|**T1** = tensor(double), tensor(float)
**T2** = tensor(double), tensor(float)| +|GridSample|*in* X:**T1**
*in* grid:**T2**
*out* Y:**T1**|22+|**T1** = tensor(double), tensor(float)
**T2** = tensor(double), tensor(float)| +|||[20, 21]|**T1** = tensor(double), tensor(float)
**T2** = tensor(double), tensor(float)| |||[16, 19]|**T1** = tensor(float)
**T2** = tensor(float)| |HammingWindow|*in* size:**T1**
*out* output:**T2**|17+|**T1** = tensor(int32), tensor(int64)
**T2** = tensor(double), tensor(float), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)| |HannWindow|*in* size:**T1**
*out* output:**T2**|17+|**T1** = tensor(int32), tensor(int64)
**T2** = tensor(double), tensor(float), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)| -|HardSigmoid|*in* X:**T**
*out* Y:**T**|6+|**T** = tensor(float)| +|HardSigmoid|*in* X:**T**
*out* Y:**T**|22+|**T** = tensor(float)| +|||[6, 21]|**T** = tensor(float)| |Hardmax|*in* input:**T**
*out* output:**T**|13+|**T** = tensor(float)| |||[11, 12]|**T** = tensor(float)| |||[1, 10]|**T** = tensor(float)| @@ -165,7 +185,8 @@ Do not modify directly.* |||[11, 12]|**B** = tensor(bool)
**V** = tensor(bfloat16), tensor(bool), tensor(double), tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(string), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)| |||[1, 10]|**B** = tensor(bool)
**V** = tensor(bfloat16), tensor(bool), tensor(double), tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(string), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)| |ImageScaler|*in* input:**T**
*out* output:**T**|1+|**T** = tensor(float)| -|InstanceNormalization|*in* input:**T**
*in* scale:**T**
*in* B:**T**
*out* output:**T**|6+|**T** = tensor(float)| +|InstanceNormalization|*in* input:**T**
*in* scale:**T**
*in* B:**T**
*out* output:**T**|22+|**T** = tensor(float)| +|||[6, 21]|**T** = tensor(float)| |IsInf|*in* X:**T1**
*out* Y:**T2**|20+|**T1** = tensor(bfloat16), tensor(double), tensor(float), tensor(float16), tensor(float8e4m3fn), tensor(float8e4m3fnuz), tensor(float8e5m2), tensor(float8e5m2fnuz)
**T2** = tensor(bool)| |||[10, 19]|**T1** = tensor(double), tensor(float)
**T2** = tensor(bool)| |IsNaN|*in* X:**T1**
*out* Y:**T2**|20+|**T1** = tensor(bfloat16), tensor(double), tensor(float), tensor(float16), tensor(float8e4m3fn), tensor(float8e4m3fnuz), tensor(float8e5m2), tensor(float8e5m2fnuz)
**T2** = tensor(bool)| @@ -173,7 +194,8 @@ Do not modify directly.* |||[9, 12]|**T1** = tensor(double), tensor(float), tensor(float16)
**T2** = tensor(bool)| |LRN|*in* X:**T**
*out* Y:**T**|13+|**T** = tensor(float)| |||[1, 12]|**T** = tensor(float)| -|LSTM|*in* X:**T**
*in* W:**T**
*in* R:**T**
*in* B:**T**
*in* sequence_lens:**T1**
*in* initial_h:**T**
*in* initial_c:**T**
*in* P:**T**
*out* Y:**T**
*out* Y_h:**T**
*out* Y_c:**T**|14+|**T** = tensor(double), tensor(float)
**T1** = tensor(int32)| +|LSTM|*in* X:**T**
*in* W:**T**
*in* R:**T**
*in* B:**T**
*in* sequence_lens:**T1**
*in* initial_h:**T**
*in* initial_c:**T**
*in* P:**T**
*out* Y:**T**
*out* Y_h:**T**
*out* Y_c:**T**|22+|**T** = tensor(double), tensor(float)
**T1** = tensor(int32)| +|||[14, 21]|**T** = tensor(double), tensor(float)
**T1** = tensor(int32)| |||[7, 13]|**T** = tensor(double), tensor(float)
**T1** = tensor(int32)| |LayerNormalization|*in* X:**T**
*in* Scale:**T**
*in* B:**T**
*out* Y:**T**
*out* Mean:**U**
*out* InvStdDev:**U**

or

*in* X:**T**
*in* Scale:**V**
*in* B:**V**
*out* Y:**V**
*out* Mean:**U**
*out* InvStdDev:**U**|17+|**T** = tensor(double), tensor(float), tensor(float16)
**U** = tensor(float)| |||[1, 16]|**T** = tensor(double), tensor(float), tensor(float16)
**U** = tensor(double), tensor(float), tensor(float16)
**V** = tensor(double), tensor(float), tensor(float16)| @@ -196,7 +218,8 @@ Do not modify directly.* |||[11, 12]|**B** = tensor(bool)
**I** = tensor(int64)
**V** = tensor(bfloat16), tensor(bool), tensor(double), tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(string), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)| |||[1, 10]|**B** = tensor(bool)
**I** = tensor(int64)
**V** = tensor(bfloat16), tensor(bool), tensor(double), tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(string), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)| |LpNormalization|*in* input:**T**
*out* output:**T**|1+|**T** = tensor(double), tensor(float)| -|LpPool|*in* X:**T**
*out* Y:**T**|18+|**T** = tensor(float)| +|LpPool|*in* X:**T**
*out* Y:**T**|22+|**T** = tensor(float)| +|||[18, 21]|**T** = tensor(float)| |||[11, 17]|**T** = tensor(float)| |||[2, 10]|**T** = tensor(float)| |MatMul|*in* A:**T**
*in* B:**T**
*out* Y:**T**|13+|**T** = tensor(double), tensor(float), tensor(int32), tensor(int64), tensor(uint32), tensor(uint64)| @@ -207,11 +230,13 @@ Do not modify directly.* |||12|**T** = tensor(double), tensor(float), tensor(float16), tensor(int32), tensor(int64), tensor(uint32), tensor(uint64)| |||[8, 11]|**T** = tensor(double), tensor(float)| |||[6, 7]|**T** = tensor(float)| -|MaxPool|*in* X:**T**
*out* Y:**T**

or

*in* X:**T**
*out* Y:**T**
*out* Indices:**I**|12+|**I** = tensor(int64)
**T** = tensor(double), tensor(float), tensor(int8), tensor(uint8)| +|MaxPool|*in* X:**T**
*out* Y:**T**

or

*in* X:**T**
*out* Y:**T**
*out* Indices:**I**|22+|**I** = tensor(int64)
**T** = tensor(double), tensor(float), tensor(int8), tensor(uint8)| +|||[12, 21]|**I** = tensor(int64)
**T** = tensor(double), tensor(float), tensor(int8), tensor(uint8)| |||[8, 11]|**I** = tensor(int64)
**T** = tensor(double), tensor(float)| |||[1, 7]|**T** = tensor(float)| |MaxRoiPool|*in* X:**T**
*in* rois:**T**
*out* Y:**T**|1+|**T** = tensor(float)| -|MaxUnpool|*in* X:**T1**
*in* I:**T2**
*in* output_shape:**T2**
*out* output:**T1**|11+|**T1** = tensor(float)
**T2** = tensor(int64)| +|MaxUnpool|*in* X:**T1**
*in* I:**T2**
*in* output_shape:**T2**
*out* output:**T1**|22+|**T1** = tensor(float)
**T2** = tensor(int64)| +|||[11, 21]|**T1** = tensor(float)
**T2** = tensor(int64)| |||[9, 10]|**T1** = tensor(float)
**T2** = tensor(int64)| |Mean|*in* data_0:**T**
*out* mean:**T**|13+|**T** = tensor(float)| |||[8, 12]|**T** = tensor(float)| @@ -226,12 +251,12 @@ Do not modify directly.* |||[6, 7]|**T** = tensor(float)| |Mod|*in* A:**T**
*in* B:**T**
*out* C:**T**|13+|**T** = tensor(double), tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)| |||[10, 12]|**T** = tensor(double), tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)| -|Mul|*in* A:**T**
*in* B:**T**
*out* C:**T**|14+|**T** = tensor(double), tensor(float), tensor(int32), tensor(int64)| -|||13|**T** = tensor(double), tensor(float), tensor(int32), tensor(int64)| -|||[7, 12]|**T** = tensor(double), tensor(float), tensor(int32), tensor(int64)| +|Mul|*in* A:**T**
*in* B:**T**
*out* C:**T**|14+|**T** = tensor(double), tensor(float), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)| +|||13|**T** = tensor(double), tensor(float), tensor(int32), tensor(int64), tensor(uint32), tensor(uint64)| +|||[7, 12]|**T** = tensor(double), tensor(float), tensor(int32), tensor(int64), tensor(uint32), tensor(uint64)| |Multinomial|*in* input:**T1**
*out* output:**T2**|7+|**T1** = tensor(float)
**T2** = tensor(int32), tensor(int64)| -|Neg|*in* X:**T**
*out* Y:**T**|13+|**T** = tensor(double), tensor(float), tensor(int32), tensor(int64), tensor(int8)| -|||[6, 12]|**T** = tensor(double), tensor(float), tensor(int32), tensor(int64), tensor(int8)| +|Neg|*in* X:**T**
*out* Y:**T**|13+|**T** = tensor(double), tensor(float), tensor(int16), tensor(int32), tensor(int64), tensor(int8)| +|||[6, 12]|**T** = tensor(double), tensor(float), tensor(int16), tensor(int32), tensor(int64), tensor(int8)| |NonZero|*in* X:**T**
*out* Y:**tensor(int64)**|13+|**T** = tensor(bool), tensor(float), tensor(int32), tensor(int64), tensor(uint8)| |||[9, 12]|**T** = tensor(bool), tensor(float), tensor(int32), tensor(int64), tensor(uint8)| |Not|*in* X:**T**
*out* Y:**T**|1+|**T** = tensor(bool)| @@ -264,7 +289,8 @@ Do not modify directly.* |||[19, 20]|**T1** = tensor(float), tensor(float16)
**T2** = tensor(float8e4m3fn), tensor(float8e4m3fnuz), tensor(float8e5m2), tensor(float8e5m2fnuz), tensor(int8), tensor(uint8)| |||[13, 18]|**T1** = tensor(float)
**T2** = tensor(int8), tensor(uint8)| |||[10, 12]|**T1** = tensor(float)
**T2** = tensor(int8), tensor(uint8)| -|RNN|*in* X:**T**
*in* W:**T**
*in* R:**T**
*in* B:**T**
*in* sequence_lens:**T1**
*in* initial_h:**T**
*out* Y:**T**
*out* Y_h:**T**|14+|**T** = tensor(float)
**T1** = tensor(int32)| +|RNN|*in* X:**T**
*in* W:**T**
*in* R:**T**
*in* B:**T**
*in* sequence_lens:**T1**
*in* initial_h:**T**
*out* Y:**T**
*out* Y_h:**T**|22+|**T** = tensor(float)
**T1** = tensor(int32)| +|||[14, 21]|**T** = tensor(float)
**T1** = tensor(int32)| |||[7, 13]|**T** = tensor(float)
**T1** = tensor(int32)| |RandomNormal|*out* output:**T**|1+|**T** = tensor(double), tensor(float)| |RandomNormalLike|*in* input:**T1**
*out* output:**T2**|1+|**T1** = tensor(bfloat16), tensor(bool), tensor(double), tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(string), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)
**T2** = tensor(double), tensor(float)| @@ -334,7 +360,8 @@ Do not modify directly.* |ReverseSequence|*in* input:**T**
*in* sequence_lens:**tensor(int64)**
*out* Y:**T**|10+|**T** = tensor(bfloat16), tensor(bool), tensor(double), tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(string), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)| |RoiAlign|*in* X:**T1**
*in* rois:**T1**
*in* batch_indices:**T2**
*out* Y:**T1**|16+|**T1** = tensor(double), tensor(float)
**T2** = tensor(int64)| |||[10, 15]|**T1** = tensor(double), tensor(float)
**T2** = tensor(int64)| -|Round|*in* X:**T**
*out* Y:**T**|11+|**T** = tensor(double), tensor(float), tensor(float16)| +|Round|*in* X:**T**
*out* Y:**T**|22+|**T** = tensor(double), tensor(float), tensor(float16)| +|||[11, 21]|**T** = tensor(double), tensor(float), tensor(float16)| |STFT|*in* signal:**T1**
*in* frame_step:**T2**
*in* window:**T1**
*in* frame_length:**T2**
*out* output:**T1**|17+|**T1** = tensor(double), tensor(float)
**T2** = tensor(int32), tensor(int64)| |Scale|*in* input:**T**
*out* output:**T**|1+|**T** = tensor(float)| |ScaledTanh|*in* input:**T**
*out* output:**T**|1+|**T** = tensor(float)| @@ -353,7 +380,8 @@ Do not modify directly.* |||[16, 17]|**T** = tensor(bfloat16), tensor(bool), tensor(double), tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(string), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)| |||[13, 15]|**T** = tensor(bfloat16), tensor(bool), tensor(double), tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(string), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)| |||[11, 12]|**T** = tensor(bfloat16), tensor(bool), tensor(double), tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(string), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)| -|Selu|*in* X:**T**
*out* Y:**T**|6+|**T** = tensor(float)| +|Selu|*in* X:**T**
*out* Y:**T**|22+|**T** = tensor(float)| +|||[6, 21]|**T** = tensor(float)| |SequenceAt|*in* input_sequence:**S**
*in* position:**I**
*out* tensor:**T**|11+|**I** = tensor(int32), tensor(int64)
**S** = seq(tensor(bfloat16)), seq(tensor(bool)), seq(tensor(double)), seq(tensor(float)), seq(tensor(float16)), seq(tensor(int16)), seq(tensor(int32)), seq(tensor(int64)), seq(tensor(int8)), seq(tensor(string)), seq(tensor(uint16)), seq(tensor(uint32)), seq(tensor(uint64)), seq(tensor(uint8))
**T** = tensor(bfloat16), tensor(bool), tensor(double), tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(string), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)| |SequenceConstruct|*in* inputs:**T**
*out* output_sequence:**S**|11+|**S** = seq(tensor(bfloat16)), seq(tensor(bool)), seq(tensor(double)), seq(tensor(float)), seq(tensor(float16)), seq(tensor(int16)), seq(tensor(int32)), seq(tensor(int64)), seq(tensor(int8)), seq(tensor(string)), seq(tensor(uint16)), seq(tensor(uint32)), seq(tensor(uint64)), seq(tensor(uint8))
**T** = tensor(bfloat16), tensor(bool), tensor(double), tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(string), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)| |SequenceEmpty|*out* output:**S**|11+|**S** = seq(tensor(bfloat16)), seq(tensor(bool)), seq(tensor(double)), seq(tensor(float)), seq(tensor(float16)), seq(tensor(int16)), seq(tensor(int32)), seq(tensor(int64)), seq(tensor(int8)), seq(tensor(string)), seq(tensor(uint16)), seq(tensor(uint32)), seq(tensor(uint64)), seq(tensor(uint8))| @@ -371,8 +399,10 @@ Do not modify directly.* |Sign|*in* input:**T**
*out* output:**T**|13+|**T** = tensor(bfloat16), tensor(double), tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)| |||[9, 12]|**T** = tensor(bfloat16), tensor(double), tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)| |SimplifiedLayerNormalization|*in* X:**T**
*in* scale:**V**
*out* Y:**V**
*out* inv_std_var:**U**|1+|**T** = tensor(double), tensor(float), tensor(float16)
**U** = tensor(double), tensor(float), tensor(float16)
**V** = tensor(double), tensor(float), tensor(float16)| -|Sin|*in* input:**T**
*out* output:**T**|7+|**T** = tensor(double), tensor(float)| -|Sinh|*in* input:**T**
*out* output:**T**|9+|**T** = tensor(float)| +|Sin|*in* input:**T**
*out* output:**T**|22+|**T** = tensor(double), tensor(float)| +|||[7, 21]|**T** = tensor(double), tensor(float)| +|Sinh|*in* input:**T**
*out* output:**T**|22+|**T** = tensor(float)| +|||[9, 21]|**T** = tensor(float)| |Size|*in* data:**T**
*out* size:**T1**|21+|**T** = tensor(bool), tensor(double), tensor(float), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(string), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)
**T1** = tensor(int64)| |||[19, 20]|**T** = tensor(bool), tensor(double), tensor(float), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(string), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)
**T1** = tensor(int64)| |||[13, 18]|**T** = tensor(bool), tensor(double), tensor(float), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(string), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)
**T1** = tensor(int64)| @@ -384,8 +414,10 @@ Do not modify directly.* |Softmax|*in* input:**T**
*out* output:**T**|13+|**T** = tensor(double), tensor(float)| |||[11, 12]|**T** = tensor(double), tensor(float)| |||[1, 10]|**T** = tensor(double), tensor(float)| -|Softplus|*in* X:**T**
*out* Y:**T**|1+|**T** = tensor(float)| -|Softsign|*in* input:**T**
*out* output:**T**|1+|**T** = tensor(float)| +|Softplus|*in* X:**T**
*out* Y:**T**|22+|**T** = tensor(float)| +|||[1, 21]|**T** = tensor(float)| +|Softsign|*in* input:**T**
*out* output:**T**|22+|**T** = tensor(float)| +|||[1, 21]|**T** = tensor(float)| |SpaceToDepth|*in* input:**T**
*out* output:**T**|13+|**T** = tensor(double), tensor(float)| |||[1, 12]|**T** = tensor(double), tensor(float)| |Split|*in* input:**T**
*in* split:**T**
*out* outputs...:**T**

or

*in* input:**T**
*in* split:**tensor(int64)**
*out* outputs:**T**

or

*in* input:**T**
*out* outputs:**T**|18+|**T** = tensor(bfloat16), tensor(bool), tensor(double), tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(string), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)| @@ -402,17 +434,19 @@ Do not modify directly.* |StringConcat|*in* X:**T**
*in* Y:**T**
*out* Z:**T**|20+|**T** = tensor(string)| |StringNormalizer|*in* X:**tensor(string)**
*out* Y:**tensor(string)**|10+|**X** = tensor(string)| |StringSplit|*in* X:**T1**
*out* Y:**T2**
*out* Z:**T3**|20+|**T1** = tensor(string)
**T2** = tensor(string)
**T3** = tensor(int64)| -|Sub|*in* A:**T**
*in* B:**T**
*out* C:**T**|14+|**T** = tensor(double), tensor(float), tensor(int32), tensor(int64)| -|||13|**T** = tensor(double), tensor(float), tensor(int32), tensor(int64)| -|||[7, 12]|**T** = tensor(double), tensor(float), tensor(int32), tensor(int64)| +|Sub|*in* A:**T**
*in* B:**T**
*out* C:**T**|14+|**T** = tensor(double), tensor(float), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)| +|||13|**T** = tensor(double), tensor(float), tensor(int32), tensor(int64), tensor(uint32), tensor(uint64)| +|||[7, 12]|**T** = tensor(double), tensor(float), tensor(int32), tensor(int64), tensor(uint32), tensor(uint64)| |Sum|*in* data_0:**T**
*out* sum:**T**|13+|**T** = tensor(double), tensor(float)| |||[8, 12]|**T** = tensor(double), tensor(float)| |||[6, 7]|**T** = tensor(double), tensor(float)| -|Tan|*in* input:**T**
*out* output:**T**|7+|**T** = tensor(float)| +|Tan|*in* input:**T**
*out* output:**T**|22+|**T** = tensor(float)| +|||[7, 21]|**T** = tensor(float)| |Tanh|*in* input:**T**
*out* output:**T**|13+|**T** = tensor(double), tensor(float)| |||[6, 12]|**T** = tensor(double), tensor(float)| |TfIdfVectorizer|*in* X:**T**
*out* Y:**T1**|9+|**T** = tensor(int32), tensor(int64), tensor(string)
**T1** = tensor(float)| -|ThresholdedRelu|*in* X:**T**
*out* Y:**T**|10+|**T** = tensor(float)| +|ThresholdedRelu|*in* X:**T**
*out* Y:**T**|22+|**T** = tensor(float)| +|||[10, 21]|**T** = tensor(float)| |||[1, 9]|**T** = tensor(float)| |Tile|*in* input:**T**
*in* repeats:**T1**
*out* output:**T**

or

*in* input:**T**
*in* tiles:**T**
*in* axis:**T**
*out* output:**T**|13+|**T** = tensor(bool), tensor(double), tensor(float), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(string), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)
**T1** = tensor(int64)| |||[6, 12]|**T** = tensor(bool), tensor(double), tensor(float), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(string), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)
**T1** = tensor(int64)| @@ -550,7 +584,7 @@ Do not modify directly.* |**Operator Domain:** *ai.onnx*|||| |Abs|*in* X:**T**
*out* Y:**T**|13+|**T** = tensor(double), tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)| |||[6, 12]|**T** = tensor(double), tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)| -|Add|*in* A:**T**
*in* B:**T**
*out* C:**T**|14+|**T** = tensor(bfloat16), tensor(double), tensor(float), tensor(float16), tensor(int32), tensor(int64), tensor(uint32), tensor(uint64)| +|Add|*in* A:**T**
*in* B:**T**
*out* C:**T**|14+|**T** = tensor(bfloat16), tensor(double), tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)| |||13|**T** = tensor(bfloat16), tensor(double), tensor(float), tensor(float16), tensor(int32), tensor(int64), tensor(uint32), tensor(uint64)| |||[7, 12]|**T** = tensor(double), tensor(float), tensor(float16), tensor(int32), tensor(int64), tensor(uint32), tensor(uint64)| |Affine|*in* X:**T**
*out* Y:**T**|1+|**T** = tensor(double), tensor(float), tensor(float16)| @@ -600,7 +634,7 @@ Do not modify directly.* |||[19, 20]|**T1** = tensor(float8e4m3fn), tensor(float8e5m2), tensor(int8), tensor(uint8)
**T2** = tensor(float), tensor(float16)| |||[13, 18]|**T** = tensor(int8), tensor(uint8)| |||[10, 12]|**T** = tensor(int8), tensor(uint8)| -|Div|*in* A:**T**
*in* B:**T**
*out* C:**T**|14+|**T** = tensor(bfloat16), tensor(double), tensor(float), tensor(float16), tensor(int32), tensor(int64), tensor(uint32), tensor(uint64)| +|Div|*in* A:**T**
*in* B:**T**
*out* C:**T**|14+|**T** = tensor(bfloat16), tensor(double), tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)| |||13|**T** = tensor(bfloat16), tensor(double), tensor(float), tensor(float16), tensor(int32), tensor(int64), tensor(uint32), tensor(uint64)| |||[7, 12]|**T** = tensor(double), tensor(float), tensor(float16), tensor(int32), tensor(int64), tensor(uint32), tensor(uint64)| |Dropout|*in* data:**T**
*in* ratio:**T1**
*in* training_mode:**T2**
*out* output:**T**
*out* mask:**T2**

or

*in* data:**T**
*out* output:**T**
*out* mask:**T**

or

*in* data:**T**
*out* output:**T**
*out* mask:**T1**|13+|**T** = tensor(bfloat16), tensor(double), tensor(float), tensor(float16)
**T1** = tensor(bfloat16), tensor(double), tensor(float), tensor(float16)
**T2** = tensor(bool)| @@ -706,7 +740,7 @@ Do not modify directly.* |||[6, 11]|**T** = tensor(bfloat16), tensor(double), tensor(float), tensor(float16)| |Mod|*in* A:**T**
*in* B:**T**
*out* C:**T**|13+|**T** = tensor(bfloat16), tensor(double), tensor(float), tensor(float16), tensor(int32), tensor(int64), tensor(uint32), tensor(uint64)| |||[10, 12]|**T** = tensor(double), tensor(float), tensor(float16), tensor(int32), tensor(int64), tensor(uint32), tensor(uint64)| -|Mul|*in* A:**T**
*in* B:**T**
*out* C:**T**|14+|**T** = tensor(bfloat16), tensor(double), tensor(float), tensor(float16), tensor(int32), tensor(int64), tensor(uint32), tensor(uint64)| +|Mul|*in* A:**T**
*in* B:**T**
*out* C:**T**|14+|**T** = tensor(bfloat16), tensor(double), tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)| |||13|**T** = tensor(bfloat16), tensor(double), tensor(float), tensor(float16), tensor(int32), tensor(int64), tensor(uint32), tensor(uint64)| |||[7, 12]|**T** = tensor(double), tensor(float), tensor(float16), tensor(int32), tensor(int64), tensor(uint32), tensor(uint64)| |Neg|*in* X:**T**
*out* Y:**T**|13+|**T** = tensor(bfloat16), tensor(double), tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int64), tensor(int8)| @@ -830,7 +864,7 @@ Do not modify directly.* |Squeeze|*in* data:**T**
*in* axes:**tensor(int64)**
*out* squeezed:**T**

or

*in* data:**T**
*out* squeezed:**T**|13+|**T** = tensor(bfloat16), tensor(bool), tensor(double), tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)| |||[11, 12]|**T** = tensor(bfloat16), tensor(bool), tensor(double), tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)| |||[1, 10]|**T** = tensor(bfloat16), tensor(bool), tensor(double), tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)| -|Sub|*in* A:**T**
*in* B:**T**
*out* C:**T**|14+|**T** = tensor(bfloat16), tensor(double), tensor(float), tensor(float16), tensor(int32), tensor(int64), tensor(uint32), tensor(uint64)| +|Sub|*in* A:**T**
*in* B:**T**
*out* C:**T**|14+|**T** = tensor(bfloat16), tensor(double), tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)| |||13|**T** = tensor(bfloat16), tensor(double), tensor(float), tensor(float16), tensor(int32), tensor(int64), tensor(uint32), tensor(uint64)| |||[7, 12]|**T** = tensor(double), tensor(float), tensor(float16), tensor(int32), tensor(int64), tensor(uint32), tensor(uint64)| |Sum|*in* data_0:**T**
*out* sum:**T**|13+|**T** = tensor(bfloat16), tensor(double), tensor(float), tensor(float16)| diff --git a/docs/python/_common/onnx_sphinx.py b/docs/python/_common/onnx_sphinx.py index 7562d23289d90..1b99dcf805530 100644 --- a/docs/python/_common/onnx_sphinx.py +++ b/docs/python/_common/onnx_sphinx.py @@ -2,6 +2,7 @@ """ Automates the generation of ONNX operators. """ + import importlib import inspect import keyword @@ -281,7 +282,7 @@ def get_domain_list(): """ Returns the list of available domains. """ - return list(sorted(set(map(lambda s: s.domain, get_all_schemas_with_history())))) + return sorted({s.domain for s in get_all_schemas_with_history()}) def get_operator_schemas(op_name, version=None, domain=None): @@ -778,9 +779,9 @@ def render(self, indent=""): name = op["name"] dom = self.domain.replace(".", "-") table_dom.append(f" * - :ref:`l-onnx-doc{dom}-{name}`") - versions = list(reversed(sorted((k, v) for k, v in op["links"].items() if isinstance(k, int)))) + versions = sorted(((k, v) for k, v in op["links"].items() if isinstance(k, int)), reverse=True) col1 = ", ".join(f":ref:`{k} <{v}>`" for k, v in versions) - diffs = list(reversed(sorted((k, v) for k, v in op["links"].items() if isinstance(k, tuple)))) + diffs = sorted(((k, v) for k, v in op["links"].items() if isinstance(k, tuple)), reverse=True) col2 = ", ".join(f":ref:`{k[1]}/{k[0]} <{v}>`" for k, v in diffs) table_dom.append(f" - {col1}") table_dom.append(f" - {col2}") diff --git a/docs/python/examples/plot_backend.py b/docs/python/examples/plot_backend.py index 58fb4cd84f82f..65b5fd0cf70ee 100644 --- a/docs/python/examples/plot_backend.py +++ b/docs/python/examples/plot_backend.py @@ -14,6 +14,7 @@ Let's use the API to compute the prediction of a simple logistic regression model. """ + import numpy as np from onnx import load diff --git a/docs/python/examples/plot_common_errors.py b/docs/python/examples/plot_common_errors.py index dc7078831a257..85cfbf6b97abf 100644 --- a/docs/python/examples/plot_common_errors.py +++ b/docs/python/examples/plot_common_errors.py @@ -15,6 +15,7 @@ trained on *Iris* datasets. The model takes a vector of dimension 2 and returns a class among three. """ + import numpy import onnxruntime as rt diff --git a/docs/python/examples/plot_convert_pipeline_vectorizer.py b/docs/python/examples/plot_convert_pipeline_vectorizer.py index 06e9e8d29e5b8..2215cb73ee643 100644 --- a/docs/python/examples/plot_convert_pipeline_vectorizer.py +++ b/docs/python/examples/plot_convert_pipeline_vectorizer.py @@ -16,6 +16,7 @@ The first step consists in creating a dummy datasets. """ + import pandas from sklearn.datasets import make_regression from sklearn.model_selection import train_test_split diff --git a/docs/python/examples/plot_profiling.py b/docs/python/examples/plot_profiling.py index d35ef725562cf..6e575ec9eb4a1 100644 --- a/docs/python/examples/plot_profiling.py +++ b/docs/python/examples/plot_profiling.py @@ -11,6 +11,7 @@ *ONNX Runtime* can profile the execution of the model. This example shows how to interpret the results. """ + import numpy import onnx diff --git a/docs/python/examples/plot_train_convert_predict.py b/docs/python/examples/plot_train_convert_predict.py index 44b6bb74c29df..f0fd8694fb541 100644 --- a/docs/python/examples/plot_train_convert_predict.py +++ b/docs/python/examples/plot_train_convert_predict.py @@ -212,9 +212,9 @@ def sess_predict_proba_rf(x): rf.fit(X_train, y_train) initial_type = [("float_input", FloatTensorType([1, 4]))] onx = convert_sklearn(rf, initial_types=initial_type) - with open("rf_iris_%d.onnx" % n_trees, "wb") as f: + with open(f"rf_iris_{n_trees}.onnx", "wb") as f: f.write(onx.SerializeToString()) - sess = rt.InferenceSession("rf_iris_%d.onnx" % n_trees, providers=rt.get_available_providers()) + sess = rt.InferenceSession(f"rf_iris_{n_trees}.onnx", providers=rt.get_available_providers()) def sess_predict_proba_loop(x): return sess.run([prob_name], {input_name: x.astype(numpy.float32)})[0] # noqa: B023 diff --git a/include/onnxruntime/core/common/profiler_common.h b/include/onnxruntime/core/common/profiler_common.h index 0074d5e74a461..ab973256fe5f1 100644 --- a/include/onnxruntime/core/common/profiler_common.h +++ b/include/onnxruntime/core/common/profiler_common.h @@ -81,8 +81,8 @@ class EpProfiler { virtual ~EpProfiler() = default; virtual bool StartProfiling(TimePoint profiling_start_time) = 0; // called when profiling starts virtual void EndProfiling(TimePoint start_time, Events& events) = 0; // called when profiling ends, save all captures numbers to "events" - virtual void Start(uint64_t){}; // called before op start, accept an id as argument to identify the op - virtual void Stop(uint64_t){}; // called after op stop, accept an id as argument to identify the op + virtual void Start(uint64_t) {} // called before op start, accept an id as argument to identify the op + virtual void Stop(uint64_t) {} // called after op stop, accept an id as argument to identify the op }; // Demangle C++ symbols diff --git a/include/onnxruntime/core/framework/allocator.h b/include/onnxruntime/core/framework/allocator.h index 57b332ce65b93..523d2a9d1a8be 100644 --- a/include/onnxruntime/core/framework/allocator.h +++ b/include/onnxruntime/core/framework/allocator.h @@ -52,6 +52,7 @@ constexpr const char* OpenVINO_CPU = "OpenVINO_CPU"; constexpr const char* OpenVINO_GPU = "OpenVINO_GPU"; constexpr const char* OpenVINO_RT = "OpenVINO_RT"; constexpr const char* OpenVINO_RT_NPU = "OpenVINO_RT_NPU"; +constexpr const char* QNN_HTP_SHARED = "QnnHtpShared"; constexpr const char* WEBGPU_BUFFER = "WebGPU_Buffer"; constexpr const char* WEBNN_TENSOR = "WebNN_Tensor"; @@ -81,6 +82,10 @@ class IAllocator { */ virtual void* Alloc(size_t size) = 0; + /** + * Free memory at p. + * If p is nullptr, do nothing. + */ virtual void Free(void* p) = 0; // Reserve() is an interface exposed for an implementation of IAllocator diff --git a/include/onnxruntime/core/framework/float16.h b/include/onnxruntime/core/framework/float16.h index dac0a01fbc3fe..97420ffe438d1 100644 --- a/include/onnxruntime/core/framework/float16.h +++ b/include/onnxruntime/core/framework/float16.h @@ -261,19 +261,19 @@ struct BFloat16 : onnxruntime_float16::BFloat16Impl { // initializers with MLFloat16 and BFloat16 from unsigned short // E.g 10_f16 or 10_b16 #if !defined(__CUDACC__) && !defined(__HIPCC__) -inline MLFloat16 operator"" _f16(unsigned long long int v) noexcept { +inline MLFloat16 operator""_f16(unsigned long long int v) noexcept { return MLFloat16::FromBits(narrow(v)); } -inline MLFloat16 operator"" _fp16(long double v) noexcept { +inline MLFloat16 operator""_fp16(long double v) noexcept { return MLFloat16(static_cast(v)); } -inline BFloat16 operator"" _b16(unsigned long long int v) noexcept { +inline BFloat16 operator""_b16(unsigned long long int v) noexcept { return BFloat16::FromBits((narrow(v))); } -inline BFloat16 operator"" _bfp16(long double v) noexcept { +inline BFloat16 operator""_bfp16(long double v) noexcept { return BFloat16(static_cast(v)); } #endif diff --git a/include/onnxruntime/core/framework/float8.h b/include/onnxruntime/core/framework/float8.h index 5d92ee86af864..9e94cc297f782 100644 --- a/include/onnxruntime/core/framework/float8.h +++ b/include/onnxruntime/core/framework/float8.h @@ -165,11 +165,11 @@ inline ORT_HOST_DEVICE bool operator<(const Float8E4M3FN& left, const Float8E4M3 // initializers with MLFloat8E4M3FN and Float8E4M3FN from unsigned char #if !defined(__CUDACC__) && !defined(__HIPCC__) -inline Float8E4M3FN operator"" _f8e4m3fn(unsigned long long int v) { +inline Float8E4M3FN operator""_f8e4m3fn(unsigned long long int v) { return Float8E4M3FN(narrow(v), Float8E4M3FN::FromBits()); } -inline Float8E4M3FN operator"" _f8e4m3fnp8(long double v) { +inline Float8E4M3FN operator""_f8e4m3fnp8(long double v) { return Float8E4M3FN(static_cast(v), true); } @@ -323,11 +323,11 @@ inline ORT_HOST_DEVICE bool operator<(const Float8E4M3FNUZ& left, const Float8E4 // initializers with MLFloat8E4M3FN and Float8E4M3FN from unsigned char #if !defined(__CUDACC__) && !defined(__HIPCC__) -inline Float8E4M3FNUZ operator"" _f8e4m3p8fnuz(unsigned long long int v) { +inline Float8E4M3FNUZ operator""_f8e4m3p8fnuz(unsigned long long int v) { return Float8E4M3FNUZ(narrow(v), Float8E4M3FNUZ::FromBits()); } -inline Float8E4M3FNUZ operator"" _f8e4m3fnuzp8(long double v) { +inline Float8E4M3FNUZ operator""_f8e4m3fnuzp8(long double v) { return Float8E4M3FNUZ(static_cast(v), true); } @@ -493,11 +493,11 @@ inline ORT_HOST_DEVICE bool operator<(const Float8E5M2& left, const Float8E5M2& // initializers with MLFloat8E5M2 and Float8E5M2 from unsigned char #if !defined(__CUDACC__) && !defined(__HIPCC__) -inline Float8E5M2 operator"" _f8e5m2fn(unsigned long long int v) { +inline Float8E5M2 operator""_f8e5m2fn(unsigned long long int v) { return Float8E5M2(narrow(v), Float8E5M2::FromBits()); } -inline Float8E5M2 operator"" _f8e5m2fnp8(long double v) { +inline Float8E5M2 operator""_f8e5m2fnp8(long double v) { return Float8E5M2(static_cast(v), true); } @@ -642,11 +642,11 @@ inline ORT_HOST_DEVICE bool operator<(const Float8E5M2FNUZ& left, const Float8E5 // initializers with MLFloat8E5M2 and Float8E5M2 from unsigned char #if !defined(__CUDACC__) && !defined(__HIPCC__) -inline Float8E5M2FNUZ operator"" _f8e5m2fnuz(unsigned long long int v) { +inline Float8E5M2FNUZ operator""_f8e5m2fnuz(unsigned long long int v) { return Float8E5M2FNUZ(narrow(v), Float8E5M2FNUZ::FromBits()); } -inline Float8E5M2FNUZ operator"" _f8e5m2fnuzp8(long double v) { +inline Float8E5M2FNUZ operator""_f8e5m2fnuzp8(long double v) { return Float8E5M2FNUZ(static_cast(v), true); } diff --git a/include/onnxruntime/core/framework/ortdevice.h b/include/onnxruntime/core/framework/ortdevice.h index 6f658ab65be20..adade482f6a17 100644 --- a/include/onnxruntime/core/framework/ortdevice.h +++ b/include/onnxruntime/core/framework/ortdevice.h @@ -25,6 +25,7 @@ struct OrtDevice { static const MemoryType CUDA_PINNED = 1; static const MemoryType HIP_PINNED = 2; static const MemoryType CANN_PINNED = 3; + static const MemoryType QNN_HTP_SHARED = 4; }; constexpr OrtDevice(DeviceType device_type_, MemoryType memory_type_, DeviceId device_id_) diff --git a/include/onnxruntime/core/framework/ortmemoryinfo.h b/include/onnxruntime/core/framework/ortmemoryinfo.h index 7af5554e25c0b..82f581e994904 100644 --- a/include/onnxruntime/core/framework/ortmemoryinfo.h +++ b/include/onnxruntime/core/framework/ortmemoryinfo.h @@ -6,6 +6,8 @@ #include #include "core/common/hash_combine.h" +#include "core/framework/ortdevice.h" +#include "core/session/onnxruntime_c_api.h" // for OrtMemType, OrtAllocatorType struct OrtMemoryInfo { OrtMemoryInfo() = default; // to allow default construction of Tensor diff --git a/include/onnxruntime/core/graph/node_arg.h b/include/onnxruntime/core/graph/node_arg.h index 921bff59fb6d4..0ddf1a2b9d3de 100644 --- a/include/onnxruntime/core/graph/node_arg.h +++ b/include/onnxruntime/core/graph/node_arg.h @@ -3,7 +3,7 @@ #pragma once -#include "onnx/onnx_pb.h" +#include "core/graph/onnx_protobuf.h" #include "core/graph/basic_types.h" #include "core/common/status.h" diff --git a/onnxruntime/core/graph/onnx_protobuf.h b/include/onnxruntime/core/graph/onnx_protobuf.h similarity index 100% rename from onnxruntime/core/graph/onnx_protobuf.h rename to include/onnxruntime/core/graph/onnx_protobuf.h diff --git a/include/onnxruntime/core/platform/EigenNonBlockingThreadPool.h b/include/onnxruntime/core/platform/EigenNonBlockingThreadPool.h index a7c63c507d1ba..26fc440f7bfc5 100644 --- a/include/onnxruntime/core/platform/EigenNonBlockingThreadPool.h +++ b/include/onnxruntime/core/platform/EigenNonBlockingThreadPool.h @@ -218,18 +218,18 @@ class ThreadPoolProfiler { WAIT_REVOKE, MAX_EVENT }; - ThreadPoolProfiler(int, const CHAR_TYPE*) {}; + ThreadPoolProfiler(int, const CHAR_TYPE*) {} ~ThreadPoolProfiler() = default; ORT_DISALLOW_COPY_ASSIGNMENT_AND_MOVE(ThreadPoolProfiler); - void Start() {}; + void Start() {} std::string Stop() { return "not available for minimal build"; } - void LogStart() {}; - void LogEnd(ThreadPoolEvent){}; - void LogEndAndStart(ThreadPoolEvent){}; - void LogStartAndCoreAndBlock(std::ptrdiff_t){}; - void LogCoreAndBlock(std::ptrdiff_t){}; - void LogThreadId(int) {}; - void LogRun(int) {}; + void LogStart() {} + void LogEnd(ThreadPoolEvent) {} + void LogEndAndStart(ThreadPoolEvent) {} + void LogStartAndCoreAndBlock(std::ptrdiff_t) {} + void LogCoreAndBlock(std::ptrdiff_t) {} + void LogThreadId(int) {} + void LogRun(int) {} std::string DumpChildThreadStat() { return {}; } }; #else diff --git a/include/onnxruntime/core/session/onnxruntime_c_api.h b/include/onnxruntime/core/session/onnxruntime_c_api.h index 3d995e21e0017..6fef2448be0fe 100644 --- a/include/onnxruntime/core/session/onnxruntime_c_api.h +++ b/include/onnxruntime/core/session/onnxruntime_c_api.h @@ -3670,6 +3670,10 @@ struct OrtApi { * "enable_htp_spill_fill_buffer": Enable HTP spill fill buffer setting. The flag is used while generating context binary. * - "0": Default. Disabled. * - "1": Enabled. + * "enable_htp_shared_memory_allocator": Enable the QNN HTP shared memory allocator. Requires libcdsprpc.so/dll to + * be available. + * - "0": Default. Disabled. + * - "1": Enabled. * * SNPE supported keys: * "runtime": SNPE runtime engine, options: "CPU", "CPU_FLOAT32", "GPU", "GPU_FLOAT32_16_HYBRID", "GPU_FLOAT16", diff --git a/include/onnxruntime/core/session/onnxruntime_cxx_api.h b/include/onnxruntime/core/session/onnxruntime_cxx_api.h index f3e9758766d00..123ef98901003 100644 --- a/include/onnxruntime/core/session/onnxruntime_cxx_api.h +++ b/include/onnxruntime/core/session/onnxruntime_cxx_api.h @@ -2130,10 +2130,10 @@ struct KernelContext { explicit KernelContext(OrtKernelContext* context); size_t GetInputCount() const; size_t GetOutputCount() const; - // If input is optional and is not present, the method returns en empty ConstValue + // If input is optional and is not present, the method returns an empty ConstValue // which can be compared to nullptr. ConstValue GetInput(size_t index) const; - // If outout is optional and is not present, the method returns en empty UnownedValue + // If output is optional and is not present, the method returns an empty UnownedValue // which can be compared to nullptr. UnownedValue GetOutput(size_t index, const int64_t* dim_values, size_t dim_count) const; UnownedValue GetOutput(size_t index, const std::vector& dims) const; diff --git a/java/src/main/java/ai/onnxruntime/OnnxRuntime.java b/java/src/main/java/ai/onnxruntime/OnnxRuntime.java index b80debdde47c4..c28c79f1e723e 100644 --- a/java/src/main/java/ai/onnxruntime/OnnxRuntime.java +++ b/java/src/main/java/ai/onnxruntime/OnnxRuntime.java @@ -76,6 +76,9 @@ final class OnnxRuntime { /** The short name of the ONNX runtime TensorRT provider library */ static final String ONNXRUNTIME_LIBRARY_TENSORRT_NAME = "onnxruntime_providers_tensorrt"; + /** The short name of the ONNX runtime QNN provider library */ + static final String ONNXRUNTIME_LIBRARY_QNN_NAME = "onnxruntime_providers_qnn"; + /** The OS & CPU architecture string */ private static final String OS_ARCH_STR = initOsArch(); @@ -159,8 +162,11 @@ static synchronized void init() throws IOException { // the ONNX Runtime native library will load it extractProviderLibrary(ONNXRUNTIME_LIBRARY_SHARED_NAME); - load(ONNXRUNTIME_LIBRARY_NAME); + if (!isAndroid()) { + load(ONNXRUNTIME_LIBRARY_NAME); + } load(ONNXRUNTIME_JNI_LIBRARY_NAME); + ortApiHandle = initialiseAPIBase(ORT_API_VERSION_14); if (ortApiHandle == 0L) { throw new IllegalStateException( @@ -252,6 +258,16 @@ static boolean extractTensorRT() { return extractProviderLibrary(ONNXRUNTIME_LIBRARY_TENSORRT_NAME); } + /** + * Extracts the QNN provider library from the classpath resources if present, or checks to see if + * the QNN provider library is in the directory specified by {@link #ONNXRUNTIME_NATIVE_PATH}. + * + * @return True if the QNN provider library is ready for loading, false otherwise. + */ + static boolean extractQNN() { + return extractProviderLibrary(ONNXRUNTIME_LIBRARY_QNN_NAME); + } + /** * Extracts a shared provider library from the classpath resources if present, or checks to see if * that library is in the directory specified by {@link #ONNXRUNTIME_NATIVE_PATH}. @@ -260,7 +276,7 @@ static boolean extractTensorRT() { * @return True if the library is ready for loading by ORT's native code, false otherwise. */ static synchronized boolean extractProviderLibrary(String libraryName) { - // Android does not need to extract library and it has no shared provider library + // Android does not need to extract provider libraries. if (isAndroid()) { return false; } @@ -312,7 +328,7 @@ static boolean isAndroid() { private static void load(String library) throws IOException { // On Android, we simply use System.loadLibrary if (isAndroid()) { - System.loadLibrary("onnxruntime4j_jni"); + System.loadLibrary(library); return; } diff --git a/java/src/main/java/ai/onnxruntime/OrtSession.java b/java/src/main/java/ai/onnxruntime/OrtSession.java index 32dc9d9f84aaa..bd988e2bb7468 100644 --- a/java/src/main/java/ai/onnxruntime/OrtSession.java +++ b/java/src/main/java/ai/onnxruntime/OrtSession.java @@ -1320,6 +1320,10 @@ public void addXnnpack(Map providerOptions) throws OrtException */ public void addQnn(Map providerOptions) throws OrtException { String qnnProviderName = "QNN"; + + // QNN can either be built as a shared or static library. extractQNN() will extract the + // (lib)onnxruntime_providers_qnn(.so/.dll) from classpath resources if present. + OnnxRuntime.extractQNN(); addExecutionProvider(qnnProviderName, providerOptions); } diff --git a/js/package-lock.json b/js/package-lock.json index 92eb0b422c76b..e915c001655d1 100644 --- a/js/package-lock.json +++ b/js/package-lock.json @@ -29,7 +29,7 @@ "mocha": "^11.0.1", "npmlog": "^7.0.1", "prettier": "^3.3.3", - "terser": "^5.31.0", + "terser": "^5.37.0", "typescript": "^5.2.2" } }, @@ -4783,9 +4783,9 @@ } }, "node_modules/terser": { - "version": "5.31.0", - "resolved": "https://registry.npmjs.org/terser/-/terser-5.31.0.tgz", - "integrity": "sha512-Q1JFAoUKE5IMfI4Z/lkE/E6+SwgzO+x4tq4v1AyBLRj8VSYvRO6A/rQrPg1yud4g0En9EKI1TvFRF2tQFcoUkg==", + "version": "5.37.0", + "resolved": "https://registry.npmjs.org/terser/-/terser-5.37.0.tgz", + "integrity": "sha512-B8wRRkmre4ERucLM/uXx4MOV5cbnOlVAqUst+1+iLKPI0dOgFO28f84ptoQt9HEI537PMzfYa/d+GEPKTRXmYA==", "dev": true, "dependencies": { "@jridgewell/source-map": "^0.3.3", @@ -8518,9 +8518,9 @@ "dev": true }, "terser": { - "version": "5.31.0", - "resolved": "https://registry.npmjs.org/terser/-/terser-5.31.0.tgz", - "integrity": "sha512-Q1JFAoUKE5IMfI4Z/lkE/E6+SwgzO+x4tq4v1AyBLRj8VSYvRO6A/rQrPg1yud4g0En9EKI1TvFRF2tQFcoUkg==", + "version": "5.37.0", + "resolved": "https://registry.npmjs.org/terser/-/terser-5.37.0.tgz", + "integrity": "sha512-B8wRRkmre4ERucLM/uXx4MOV5cbnOlVAqUst+1+iLKPI0dOgFO28f84ptoQt9HEI537PMzfYa/d+GEPKTRXmYA==", "dev": true, "requires": { "@jridgewell/source-map": "^0.3.3", diff --git a/js/package.json b/js/package.json index 3b7a3ec6e0a33..15cbe12e2742f 100644 --- a/js/package.json +++ b/js/package.json @@ -23,7 +23,7 @@ "mocha": "^11.0.1", "npmlog": "^7.0.1", "prettier": "^3.3.3", - "terser": "^5.31.0", + "terser": "^5.37.0", "typescript": "^5.2.2" }, "scripts": { diff --git a/js/react_native/android/build.gradle b/js/react_native/android/build.gradle index 521866ff0f3e2..2f5b5adc7a1fa 100644 --- a/js/react_native/android/build.gradle +++ b/js/react_native/android/build.gradle @@ -70,6 +70,8 @@ def REACT_NATIVE_VERSION = ['node', '--print', "JSON.parse(require('fs').readFil def REACT_NATIVE_MINOR_VERSION = REACT_NATIVE_VERSION.split("\\.")[1].toInteger() android { +// This is needed by the new AndroidManifestNew.xml + namespace "ai.onnxruntime.reactnative" compileSdkVersion getExtOrIntegerDefault('compileSdkVersion') buildToolsVersion getExtOrDefault('buildToolsVersion') defaultConfig { @@ -110,6 +112,8 @@ android { } packagingOptions { + pickFirst '**/libc++_shared.so' + pickFirst '**/libfbjni.so' doNotStrip resolveBuildType() == 'debug' ? "**/**/*.so" : '' excludes = [ "META-INF", @@ -127,13 +131,15 @@ android { disable 'GradleCompatible' } compileOptions { - sourceCompatibility JavaVersion.VERSION_1_8 - targetCompatibility JavaVersion.VERSION_1_8 + sourceCompatibility JavaVersion.VERSION_17 + targetCompatibility JavaVersion.VERSION_17 } sourceSets { main { java.srcDirs = ['src/main/java/'] +// A tricky situation where iOS still uses the AndroidManifest.xml file, but the Android use AndroidManifestNew.xml + manifest.srcFile "src/main/AndroidManifestNew.xml" if (ortExtensionsEnabled) { java.exclude '**/OnnxruntimeExtensionsDisabled.java' } else { @@ -218,7 +224,8 @@ repositories { } dependencies { - api "com.facebook.react:react-native:" + REACT_NATIVE_VERSION + //noinspection GradleDynamicVersion + implementation "com.facebook.react:react-android:"+ REACT_NATIVE_VERSION api "org.mockito:mockito-core:2.28.2" androidTestImplementation "androidx.test:runner:1.5.2" @@ -233,4 +240,4 @@ dependencies { if (ortExtensionsEnabled) { implementation "com.microsoft.onnxruntime:onnxruntime-extensions-android:latest.integration@aar" } -} +} \ No newline at end of file diff --git a/js/react_native/android/gradle.properties b/js/react_native/android/gradle.properties index 8fe6e40d76911..3461ce4919d38 100644 --- a/js/react_native/android/gradle.properties +++ b/js/react_native/android/gradle.properties @@ -4,7 +4,7 @@ # Specifies the JVM arguments used for the daemon process. # The setting is particularly useful for tweaking memory settings. # Default value: -Xmx1024m -XX:MaxPermSize=256m -org.gradle.jvmargs=-Xmx4096m -XX:+HeapDumpOnOutOfMemoryError -Dfile.encoding=UTF-8 +org.gradle.jvmargs=-Xmx4096m -XX:+HeapDumpOnOutOfMemoryError -Dfile.encoding=UTF-8 -Djavax.xml.accessExternalSchema=all -Djavax.xml.accessExternalDTD=all # # When configured, Gradle will run in incubating parallel mode. # This option should only be used with decoupled projects. More details, visit @@ -13,7 +13,11 @@ org.gradle.jvmargs=-Xmx4096m -XX:+HeapDumpOnOutOfMemoryError -Dfile.encoding=UTF #Tue Jan 26 17:36:02 PST 2021 android.enableJetifier=true android.useAndroidX=true -OnnxruntimeModule_buildToolsVersion=29.0.2 -OnnxruntimeModule_compileSdkVersion=31 -OnnxruntimeModule_minSdkVersion=21 -OnnxruntimeModule_targetSdkVersion=31 +OnnxruntimeModule_buildToolsVersion=33.0.0 +OnnxruntimeModule_compileSdkVersion=34 +OnnxruntimeModule_minSdkVersion=24 +OnnxruntimeModule_targetSdkVersion=34 + +systemProp.javax.xml.parsers.SAXParserFactory=com.sun.org.apache.xerces.internal.jaxp.SAXParserFactoryImpl +systemProp.javax.xml.transform.TransformerFactory=com.sun.org.apache.xalan.internal.xsltc.trax.TransformerFactoryImpl +systemProp.javax.xml.parsers.DocumentBuilderFactory=com.sun.org.apache.xerces.internal.jaxp.DocumentBuilderFactoryImpl diff --git a/js/react_native/android/src/main/AndroidManifestNew.xml b/js/react_native/android/src/main/AndroidManifestNew.xml new file mode 100644 index 0000000000000..a30cbbdd6faf7 --- /dev/null +++ b/js/react_native/android/src/main/AndroidManifestNew.xml @@ -0,0 +1,2 @@ + + diff --git a/js/react_native/e2e/android/app/build.gradle b/js/react_native/e2e/android/app/build.gradle index 526259e3f8d8f..68eaacc1908c3 100644 --- a/js/react_native/e2e/android/app/build.gradle +++ b/js/react_native/e2e/android/app/build.gradle @@ -1,132 +1,105 @@ apply plugin: "com.android.application" +apply plugin: "com.facebook.react" import com.android.build.OutputFile /** - * The react.gradle file registers a task for each build variant (e.g. bundleDebugJsAndAssets - * and bundleReleaseJsAndAssets). - * These basically call `react-native bundle` with the correct arguments during the Android build - * cycle. By default, bundleDebugJsAndAssets is skipped, as in debug/dev mode we prefer to load the - * bundle directly from the development server. Below you can see all the possible configurations - * and their defaults. If you decide to add a configuration block, make sure to add it before the - * `apply from: "../../node_modules/react-native/react.gradle"` line. - * - * project.ext.react = [ - * // the name of the generated asset file containing your JS bundle - * bundleAssetName: "index.android.bundle", - * - * // the entry file for bundle generation - * entryFile: "index.android.js", - * - * // https://reactnative.dev/docs/performance#enable-the-ram-format - * bundleCommand: "ram-bundle", - * - * // whether to bundle JS and assets in debug mode - * bundleInDebug: false, - * - * // whether to bundle JS and assets in release mode - * bundleInRelease: true, - * - * // whether to bundle JS and assets in another build variant (if configured). - * // See http://tools.android.com/tech-docs/new-build-system/user-guide#TOC-Build-Variants - * // The configuration property can be in the following formats - * // 'bundleIn${productFlavor}${buildType}' - * // 'bundleIn${buildType}' - * // bundleInFreeDebug: true, - * // bundleInPaidRelease: true, - * // bundleInBeta: true, - * - * // whether to disable dev mode in custom build variants (by default only disabled in release) - * // for OnnxruntimeModuleExample: to disable dev mode in the staging build type (if configured) - * devDisabledInStaging: true, - * // The configuration property can be in the following formats - * // 'devDisabledIn${productFlavor}${buildType}' - * // 'devDisabledIn${buildType}' - * - * // the root of your project, i.e. where "package.json" lives - * root: "../../", - * - * // where to put the JS bundle asset in debug mode - * jsBundleDirDebug: "$buildDir/intermediates/assets/debug", - * - * // where to put the JS bundle asset in release mode - * jsBundleDirRelease: "$buildDir/intermediates/assets/release", - * - * // where to put drawable resources / React Native assets, e.g. the ones you use via - * // require('./image.png')), in debug mode - * resourcesDirDebug: "$buildDir/intermediates/res/merged/debug", - * - * // where to put drawable resources / React Native assets, e.g. the ones you use via - * // require('./image.png')), in release mode - * resourcesDirRelease: "$buildDir/intermediates/res/merged/release", - * - * // by default the gradle tasks are skipped if none of the JS files or assets change; this means - * // that we don't look at files in android/ or ios/ to determine whether the tasks are up to - * // date; if you have any other folders that you want to ignore for performance reasons (gradle - * // indexes the entire tree), add them here. Alternatively, if you have JS files in android/ - * // for OnnxruntimeModuleExample, you might want to remove it from here. - * inputExcludes: ["android/**", "ios/**"], - * - * // override which node gets called and with what additional arguments - * nodeExecutableAndArgs: ["node"], - * - * // supply additional arguments to the packager - * extraPackagerArgs: [] - * ] + * This is the configuration block to customize your React Native Android app. + * By default you don't need to apply any configuration, just uncomment the lines you need. */ - -project.ext.react = [ - enableHermes: false, // clean and rebuild if changing - entryFile: "index.tsx", -] - -apply from: "../../node_modules/react-native/react.gradle" +react { + /* Folders */ + // The root of your project, i.e. where "package.json" lives. Default is '..' + // root = file("../") + // The folder where the react-native NPM package is. Default is ../node_modules/react-native + // reactNativeDir = file("../node_modules/react-native") + // The folder where the react-native Codegen package is. Default is ../node_modules/react-native-codegen + // codegenDir = file("../node_modules/react-native-codegen") + // The cli.js file which is the React Native CLI entrypoint. Default is ../node_modules/react-native/cli.js + // cliFile = file("../node_modules/react-native/cli.js") + + /* Variants */ + // The list of variants to that are debuggable. For those we're going to + // skip the bundling of the JS bundle and the assets. By default is just 'debug'. + // If you add flavors like lite, prod, etc. you'll have to list your debuggableVariants. + // debuggableVariants = ["liteDebug", "prodDebug"] + + /* Bundling */ + // A list containing the node command and its flags. Default is just 'node'. + // nodeExecutableAndArgs = ["node"] + // + // The command to run when bundling. By default is 'bundle' + // bundleCommand = "ram-bundle" + // + // The path to the CLI configuration file. Default is empty. + // bundleConfig = file(../rn-cli.config.js) + // + // The name of the generated asset file containing your JS bundle + // bundleAssetName = "MyApplication.android.bundle" + // + // The entry file for bundle generation. Default is 'index.android.js' or 'index.js' + entryFile = file("${rootProject.projectDir}/../index.tsx") + // + // A list of extra flags to pass to the 'bundle' commands. + // See https://github.com/react-native-community/cli/blob/main/docs/commands.md#bundle + // extraPackagerArgs = [] + + /* Hermes Commands */ + // The hermes compiler command to run. By default it is 'hermesc' + // hermesCommand = "$rootDir/my-custom-hermesc/bin/hermesc" + // + // The list of flags to pass to the Hermes compiler. By default is "-O", "-output-source-map" + // hermesFlags = ["-O", "-output-source-map"] +} /** - * Set this to true to create two separate APKs instead of one: - * - An APK that only works on ARM devices - * - An APK that only works on x86 devices - * The advantage is the size of the APK is reduced by about 4MB. - * Upload all the APKs to the Play Store and people will download - * the correct one based on the CPU architecture of their device. + * Set this to true to create four separate APKs instead of one, + * one for each native architecture. This is useful if you don't + * use App Bundles (https://developer.android.com/guide/app-bundle/) + * and want to have separate APKs to upload to the Play Store. */ def enableSeparateBuildPerCPUArchitecture = false /** - * Run Proguard to shrink the Java bytecode in release builds. + * Set this to true to Run Proguard on Release builds to minify the Java bytecode. */ def enableProguardInReleaseBuilds = false /** - * The preferred build flavor of JavaScriptCore. + * The preferred build flavor of JavaScriptCore (JSC) * - * For OnnxruntimeModuleExample, to use the international variant, you can use: + * For example, to use the international variant, you can use: * `def jscFlavor = 'org.webkit:android-jsc-intl:+'` * * The international variant includes ICU i18n library and necessary data * allowing to use e.g. `Date.toLocaleString` and `String.localeCompare` that - * give correct results when using with locales other than en-US. Note that + * give correct results when using with locales other than en-US. Note that * this variant is about 6MiB larger per architecture than default. */ def jscFlavor = 'org.webkit:android-jsc:+' /** - * Whether to enable the Hermes VM. - * - * This should be set on project.ext.react and mirrored here. If it is not set - * on project.ext.react, JavaScript will not be compiled to Hermes Bytecode - * and the benefits of using Hermes will therefore be sharply reduced. + * Private function to get the list of Native Architectures you want to build. + * This reads the value from reactNativeArchitectures in your gradle.properties + * file and works together with the --active-arch-only flag of react-native run-android. */ -def enableHermes = project.ext.react.get("enableHermes", false); +def reactNativeArchitectures() { + def value = project.getProperties().get("reactNativeArchitectures") + return value ? value.split(",") : ["armeabi-v7a", "x86", "x86_64", "arm64-v8a"] +} android { - compileSdkVersion rootProject.ext.compileSdkVersion + compileSdkVersion rootProject.ext.compileSdkVersion + namespace "com.example.reactnativeonnxruntimemodule" compileOptions { - sourceCompatibility JavaVersion.VERSION_1_8 - targetCompatibility JavaVersion.VERSION_1_8 + sourceCompatibility JavaVersion.VERSION_17 + targetCompatibility JavaVersion.VERSION_17 } - + packagingOptions { + pickFirst '**/libc++_shared.so' + pickFirst '**/libfbjni.so' + } defaultConfig { applicationId "com.example.reactnativeonnxruntimemodule" minSdkVersion rootProject.ext.minSdkVersion @@ -185,12 +158,12 @@ repositories { } dependencies { - androidTestImplementation('com.wix:detox:20.7.0') + androidTestImplementation('com.wix:detox:+') implementation 'androidx.appcompat:appcompat:1.1.0' implementation fileTree(dir: "libs", include: ["*.jar"]) - //noinspection GradleDynamicVersion - implementation "com.facebook.react:react-native:+" // From node_modules + // The version of react-native is set by the React Native Gradle Plugin + implementation("com.facebook.react:react-android") implementation "androidx.swiperefreshlayout:swiperefreshlayout:1.0.0" implementation 'androidx.test.ext:junit:1.1.5' @@ -205,10 +178,8 @@ dependencies { exclude group:'com.facebook.flipper' } - if (enableHermes) { - def hermesPath = "../../node_modules/hermes-engine/android/"; - debugImplementation files(hermesPath + "hermes-debug.aar") - releaseImplementation files(hermesPath + "hermes-release.aar") + if (hermesEnabled.toBoolean()) { + implementation("com.facebook.react:hermes-android") } else { implementation jscFlavor } diff --git a/js/react_native/e2e/android/app/src/debug/java/com/example/reactnativeonnxruntimemodule/ReactNativeFlipper.java b/js/react_native/e2e/android/app/src/debug/java/com/example/reactnativeonnxruntimemodule/ReactNativeFlipper.java index 5624fffa7f808..3cacb2d5d4bf0 100644 --- a/js/react_native/e2e/android/app/src/debug/java/com/example/reactnativeonnxruntimemodule/ReactNativeFlipper.java +++ b/js/react_native/e2e/android/app/src/debug/java/com/example/reactnativeonnxruntimemodule/ReactNativeFlipper.java @@ -1,10 +1,10 @@ /** - * Copyright (c) Facebook, Inc. and its affiliates. + * Copyright (c) Meta Platforms, Inc. and affiliates. * *

This source code is licensed under the MIT license found in the LICENSE file in the root * directory of this source tree. */ -package com.example.reactnativeonnxruntimemodule; +package com.reactnativeonnxruntimemodule; import android.content.Context; import com.facebook.flipper.android.AndroidFlipperClient; @@ -17,50 +17,59 @@ import com.facebook.flipper.plugins.inspector.InspectorFlipperPlugin; import com.facebook.flipper.plugins.network.FlipperOkhttpInterceptor; import com.facebook.flipper.plugins.network.NetworkFlipperPlugin; -import com.facebook.flipper.plugins.react.ReactFlipperPlugin; import com.facebook.flipper.plugins.sharedpreferences.SharedPreferencesFlipperPlugin; +import com.facebook.react.ReactInstanceEventListener; import com.facebook.react.ReactInstanceManager; import com.facebook.react.bridge.ReactContext; import com.facebook.react.modules.network.NetworkingModule; import okhttp3.OkHttpClient; +/** + * Class responsible of loading Flipper inside your React Native application. This is the debug + * flavor of it. Here you can add your own plugins and customize the Flipper setup. + */ public class ReactNativeFlipper { public static void initializeFlipper(Context context, ReactInstanceManager reactInstanceManager) { if (FlipperUtils.shouldEnableFlipper(context)) { final FlipperClient client = AndroidFlipperClient.getInstance(context); + client.addPlugin(new InspectorFlipperPlugin(context, DescriptorMapping.withDefaults())); - client.addPlugin(new ReactFlipperPlugin()); client.addPlugin(new DatabasesFlipperPlugin(context)); client.addPlugin(new SharedPreferencesFlipperPlugin(context)); client.addPlugin(CrashReporterPlugin.getInstance()); + NetworkFlipperPlugin networkFlipperPlugin = new NetworkFlipperPlugin(); - NetworkingModule.setCustomClientBuilder(new NetworkingModule.CustomClientBuilder() { - @Override - public void apply(OkHttpClient.Builder builder) { - builder.addNetworkInterceptor(new FlipperOkhttpInterceptor(networkFlipperPlugin)); - } - }); + NetworkingModule.setCustomClientBuilder( + new NetworkingModule.CustomClientBuilder() { + @Override + public void apply(OkHttpClient.Builder builder) { + builder.addNetworkInterceptor(new FlipperOkhttpInterceptor(networkFlipperPlugin)); + } + }); client.addPlugin(networkFlipperPlugin); client.start(); + // Fresco Plugin needs to ensure that ImagePipelineFactory is initialized // Hence we run if after all native modules have been initialized ReactContext reactContext = reactInstanceManager.getCurrentReactContext(); if (reactContext == null) { - reactInstanceManager.addReactInstanceEventListener(new ReactInstanceManager.ReactInstanceEventListener() { - @Override - public void onReactContextInitialized(ReactContext reactContext) { - reactInstanceManager.removeReactInstanceEventListener(this); - reactContext.runOnNativeModulesQueueThread(new Runnable() { + reactInstanceManager.addReactInstanceEventListener( + new ReactInstanceEventListener() { @Override - public void run() { - client.addPlugin(new FrescoFlipperPlugin()); + public void onReactContextInitialized(ReactContext reactContext) { + reactInstanceManager.removeReactInstanceEventListener(this); + reactContext.runOnNativeModulesQueueThread( + new Runnable() { + @Override + public void run() { + client.addPlugin(new FrescoFlipperPlugin()); + } + }); } }); - } - }); } else { client.addPlugin(new FrescoFlipperPlugin()); } } } -} +} \ No newline at end of file diff --git a/js/react_native/e2e/android/app/src/main/AndroidManifest.xml b/js/react_native/e2e/android/app/src/main/AndroidManifest.xml index 24e685b6caf0b..d219c7c18f3c2 100644 --- a/js/react_native/e2e/android/app/src/main/AndroidManifest.xml +++ b/js/react_native/e2e/android/app/src/main/AndroidManifest.xml @@ -1,5 +1,4 @@ - + diff --git a/js/react_native/e2e/android/build.gradle b/js/react_native/e2e/android/build.gradle index 5932dfc5695d6..1178791f48982 100644 --- a/js/react_native/e2e/android/build.gradle +++ b/js/react_native/e2e/android/build.gradle @@ -2,10 +2,10 @@ buildscript { ext { - buildToolsVersion = "29.0.2" - minSdkVersion = 21 - compileSdkVersion = 31 - targetSdkVersion = 31 + buildToolsVersion = "33.0.0" + minSdkVersion = 24 + compileSdkVersion = 34 + targetSdkVersion = 34 kotlinVersion = "1.5.30" } repositories { @@ -13,8 +13,10 @@ buildscript { mavenCentral() } dependencies { - classpath('com.android.tools.build:gradle:7.1.1') + classpath('com.android.tools.build:gradle:7.2.1') classpath("org.jetbrains.kotlin:kotlin-gradle-plugin:$kotlinVersion") + classpath("com.facebook.react:react-native-gradle-plugin") + // NOTE: Do not place your application dependencies here; they belong // in the individual module build.gradle files } diff --git a/js/react_native/e2e/android/gradle.properties b/js/react_native/e2e/android/gradle.properties index 5c4f82a8fc9e8..a2541138f1ba9 100644 --- a/js/react_native/e2e/android/gradle.properties +++ b/js/react_native/e2e/android/gradle.properties @@ -19,5 +19,9 @@ android.useAndroidX=true android.enableJetifier=true -FLIPPER_VERSION=0.54.0 +FLIPPER_VERSION=0.125.0 org.gradle.jvmargs=-Xmx4096M + +# Use this property to enable or disable the Hermes JS engine. +# If set to false, you will be using JSC instead. +hermesEnabled=false diff --git a/js/react_native/e2e/android/gradle/wrapper/gradle-wrapper.properties b/js/react_native/e2e/android/gradle/wrapper/gradle-wrapper.properties index 59250647c4090..f7189a776c1a0 100644 --- a/js/react_native/e2e/android/gradle/wrapper/gradle-wrapper.properties +++ b/js/react_native/e2e/android/gradle/wrapper/gradle-wrapper.properties @@ -1,6 +1,6 @@ distributionBase=GRADLE_USER_HOME distributionPath=wrapper/dists -distributionSha256Sum=b586e04868a22fd817c8971330fec37e298f3242eb85c374181b12d637f80302 -distributionUrl=https\://services.gradle.org/distributions/gradle-7.3.3-bin.zip +distributionSha256Sum=db9c8211ed63f61f60292c69e80d89196f9eb36665e369e7f00ac4cc841c2219 +distributionUrl=https\://services.gradle.org/distributions/gradle-7.5.1-all.zip zipStoreBase=GRADLE_USER_HOME zipStorePath=wrapper/dists diff --git a/js/react_native/e2e/android/settings.gradle b/js/react_native/e2e/android/settings.gradle index fd02678d9bb4b..b12a36f91dcae 100644 --- a/js/react_native/e2e/android/settings.gradle +++ b/js/react_native/e2e/android/settings.gradle @@ -1,3 +1,4 @@ rootProject.name = 'OnnxruntimeModuleExample' apply from: file("../node_modules/@react-native-community/cli-platform-android/native_modules.gradle"); applyNativeModulesSettingsGradle(settings) include ':app' +includeBuild('../node_modules/react-native-gradle-plugin') \ No newline at end of file diff --git a/js/react_native/e2e/ios/MNISTDataHandler.mm b/js/react_native/e2e/ios/MNISTDataHandler.mm index 54a4b629865d0..1a79b66ca5d2f 100644 --- a/js/react_native/e2e/ios/MNISTDataHandler.mm +++ b/js/react_native/e2e/ios/MNISTDataHandler.mm @@ -46,10 +46,7 @@ @implementation MNISTDataHandler // It gets raw input data, which can be uri or byte array and others, // returns cooked data formatted as input of a model. -RCT_EXPORT_METHOD(preprocess - : (NSString*)uri resolve - : (RCTPromiseResolveBlock)resolve reject - : (RCTPromiseRejectBlock)reject) { +RCT_EXPORT_METHOD(preprocess : (NSString*)uri resolve : (RCTPromiseResolveBlock)resolve reject : (RCTPromiseRejectBlock)reject) { @try { NSDictionary* inputDataMap = [self preprocess:uri]; resolve(inputDataMap); @@ -60,10 +57,7 @@ @implementation MNISTDataHandler // It gets a result from onnxruntime and a duration of session time for input data, // returns output data formatted as React Native map. -RCT_EXPORT_METHOD(postprocess - : (NSDictionary*)result resolve - : (RCTPromiseResolveBlock)resolve reject - : (RCTPromiseRejectBlock)reject) { +RCT_EXPORT_METHOD(postprocess : (NSDictionary*)result resolve : (RCTPromiseResolveBlock)resolve reject : (RCTPromiseRejectBlock)reject) { @try { NSDictionary* cookedMap = [self postprocess:result]; resolve(cookedMap); diff --git a/js/react_native/e2e/ios/OnnxruntimeModuleExample.xcodeproj/project.pbxproj b/js/react_native/e2e/ios/OnnxruntimeModuleExample.xcodeproj/project.pbxproj index 7200461126c35..b8c9d9ab90cc1 100644 --- a/js/react_native/e2e/ios/OnnxruntimeModuleExample.xcodeproj/project.pbxproj +++ b/js/react_native/e2e/ios/OnnxruntimeModuleExample.xcodeproj/project.pbxproj @@ -3,18 +3,20 @@ archiveVersion = 1; classes = { }; - objectVersion = 54; + objectVersion = 46; objects = { /* Begin PBXBuildFile section */ 13B07FBC1A68108700A75B9A /* AppDelegate.m in Sources */ = {isa = PBXBuildFile; fileRef = 13B07FB01A68108700A75B9A /* AppDelegate.m */; }; 13B07FBF1A68108700A75B9A /* Images.xcassets in Resources */ = {isa = PBXBuildFile; fileRef = 13B07FB51A68108700A75B9A /* Images.xcassets */; }; 13B07FC11A68108700A75B9A /* main.m in Sources */ = {isa = PBXBuildFile; fileRef = 13B07FB71A68108700A75B9A /* main.m */; }; + 81411D106EB3E14586DBF352 /* libPods-OnnxruntimeModuleExample.a in Frameworks */ = {isa = PBXBuildFile; fileRef = A98DB3380F37BDA06AFF9005 /* libPods-OnnxruntimeModuleExample.a */; }; 81AB9BB82411601600AC10FF /* LaunchScreen.storyboard in Resources */ = {isa = PBXBuildFile; fileRef = 81AB9BB72411601600AC10FF /* LaunchScreen.storyboard */; }; DB61BA27278684FB0096C971 /* OnnxruntimeModuleExampleUITests.m in Sources */ = {isa = PBXBuildFile; fileRef = DB61BA26278684FB0096C971 /* OnnxruntimeModuleExampleUITests.m */; }; DBA8BA87267293C4008CC55A /* mnist.ort in Resources */ = {isa = PBXBuildFile; fileRef = DBA8BA86267293C4008CC55A /* mnist.ort */; }; DBBF7412263B8C7100487C77 /* MNISTDataHandler.mm in Sources */ = {isa = PBXBuildFile; fileRef = DBBF7411263B8C7100487C77 /* MNISTDataHandler.mm */; }; DBBF7414263B8CCB00487C77 /* 3.jpg in Resources */ = {isa = PBXBuildFile; fileRef = DBBF7413263B8CCB00487C77 /* 3.jpg */; }; + E329E1162D3728940016B599 /* PrivacyInfo.xcprivacy in Resources */ = {isa = PBXBuildFile; fileRef = E329E1142D3728940016B599 /* PrivacyInfo.xcprivacy */; }; /* End PBXBuildFile section */ /* Begin PBXContainerItemProxy section */ @@ -49,12 +51,16 @@ 13B07FB61A68108700A75B9A /* Info.plist */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text.plist.xml; name = Info.plist; path = OnnxruntimeModuleExample/Info.plist; sourceTree = ""; }; 13B07FB71A68108700A75B9A /* main.m */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.objc; name = main.m; path = OnnxruntimeModuleExample/main.m; sourceTree = ""; }; 81AB9BB72411601600AC10FF /* LaunchScreen.storyboard */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = file.storyboard; name = LaunchScreen.storyboard; path = OnnxruntimeModuleExample/LaunchScreen.storyboard; sourceTree = ""; }; + 9D58C0FCCF00905433F4ED74 /* Pods-OnnxruntimeModuleExample.debug.xcconfig */ = {isa = PBXFileReference; includeInIndex = 1; lastKnownFileType = text.xcconfig; name = "Pods-OnnxruntimeModuleExample.debug.xcconfig"; path = "Target Support Files/Pods-OnnxruntimeModuleExample/Pods-OnnxruntimeModuleExample.debug.xcconfig"; sourceTree = ""; }; + A98DB3380F37BDA06AFF9005 /* libPods-OnnxruntimeModuleExample.a */ = {isa = PBXFileReference; explicitFileType = archive.ar; includeInIndex = 0; path = "libPods-OnnxruntimeModuleExample.a"; sourceTree = BUILT_PRODUCTS_DIR; }; + B70FCE6DFAB320E9051DA321 /* Pods-OnnxruntimeModuleExample.release.xcconfig */ = {isa = PBXFileReference; includeInIndex = 1; lastKnownFileType = text.xcconfig; name = "Pods-OnnxruntimeModuleExample.release.xcconfig"; path = "Target Support Files/Pods-OnnxruntimeModuleExample/Pods-OnnxruntimeModuleExample.release.xcconfig"; sourceTree = ""; }; DB61BA24278684FB0096C971 /* OnnxruntimeModuleExampleUITests.xctest */ = {isa = PBXFileReference; explicitFileType = wrapper.cfbundle; includeInIndex = 0; path = OnnxruntimeModuleExampleUITests.xctest; sourceTree = BUILT_PRODUCTS_DIR; }; DB61BA26278684FB0096C971 /* OnnxruntimeModuleExampleUITests.m */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.objc; path = OnnxruntimeModuleExampleUITests.m; sourceTree = ""; }; DBA8BA86267293C4008CC55A /* mnist.ort */ = {isa = PBXFileReference; lastKnownFileType = file; name = mnist.ort; path = ../src/mnist.ort; sourceTree = ""; }; DBBF7410263B8C5F00487C77 /* MNISTDataHandler.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = MNISTDataHandler.h; sourceTree = ""; }; DBBF7411263B8C7100487C77 /* MNISTDataHandler.mm */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.objcpp; path = MNISTDataHandler.mm; sourceTree = ""; }; DBBF7413263B8CCB00487C77 /* 3.jpg */ = {isa = PBXFileReference; lastKnownFileType = image.jpeg; name = 3.jpg; path = ../src/3.jpg; sourceTree = ""; }; + E329E1142D3728940016B599 /* PrivacyInfo.xcprivacy */ = {isa = PBXFileReference; lastKnownFileType = text.xml; path = PrivacyInfo.xcprivacy; sourceTree = ""; }; ED297162215061F000B7C4FE /* JavaScriptCore.framework */ = {isa = PBXFileReference; lastKnownFileType = wrapper.framework; name = JavaScriptCore.framework; path = System/Library/Frameworks/JavaScriptCore.framework; sourceTree = SDKROOT; }; /* End PBXFileReference section */ @@ -63,6 +69,7 @@ isa = PBXFrameworksBuildPhase; buildActionMask = 2147483647; files = ( + 81411D106EB3E14586DBF352 /* libPods-OnnxruntimeModuleExample.a in Frameworks */, ); runOnlyForDeploymentPostprocessing = 0; }; @@ -79,6 +86,7 @@ 13B07FAE1A68108700A75B9A /* OnnxruntimeModuleExample */ = { isa = PBXGroup; children = ( + E329E1142D3728940016B599 /* PrivacyInfo.xcprivacy */, DBBF7411263B8C7100487C77 /* MNISTDataHandler.mm */, DBBF7410263B8C5F00487C77 /* MNISTDataHandler.h */, 008F07F21AC5B25A0029DE68 /* main.jsbundle */, @@ -96,6 +104,7 @@ isa = PBXGroup; children = ( ED297162215061F000B7C4FE /* JavaScriptCore.framework */, + A98DB3380F37BDA06AFF9005 /* libPods-OnnxruntimeModuleExample.a */, ); name = Frameworks; sourceTree = ""; @@ -103,6 +112,8 @@ 6B9684456A2045ADE5A6E47E /* Pods */ = { isa = PBXGroup; children = ( + 9D58C0FCCF00905433F4ED74 /* Pods-OnnxruntimeModuleExample.debug.xcconfig */, + B70FCE6DFAB320E9051DA321 /* Pods-OnnxruntimeModuleExample.release.xcconfig */, ); path = Pods; sourceTree = ""; @@ -155,12 +166,14 @@ isa = PBXNativeTarget; buildConfigurationList = 13B07F931A680F5B00A75B9A /* Build configuration list for PBXNativeTarget "OnnxruntimeModuleExample" */; buildPhases = ( + FF1F546E251E9524E4930013 /* [CP] Check Pods Manifest.lock */, FD10A7F022414F080027D42C /* Start Packager */, 13B07F871A680F5B00A75B9A /* Sources */, 13B07F8C1A680F5B00A75B9A /* Frameworks */, 13B07F8E1A680F5B00A75B9A /* Resources */, 00DD1BFF1BD5951E006B06BC /* Bundle React Native code and images */, DB8FCD9C25C3404B00C72F26 /* Embed Libraries */, + 9BBEFBEFBEE7FC814F312449 /* [CP] Copy Pods Resources */, ); buildRules = ( ); @@ -234,6 +247,7 @@ DBA8BA87267293C4008CC55A /* mnist.ort in Resources */, DBBF7414263B8CCB00487C77 /* 3.jpg in Resources */, 81AB9BB82411601600AC10FF /* LaunchScreen.storyboard in Resources */, + E329E1162D3728940016B599 /* PrivacyInfo.xcprivacy in Resources */, 13B07FBF1A68108700A75B9A /* Images.xcassets in Resources */, ); runOnlyForDeploymentPostprocessing = 0; @@ -257,10 +271,29 @@ ); name = "Bundle React Native code and images"; outputPaths = ( + "$(CONFIGURATION_BUILD_DIR)/$(UNLOCALIZED_RESOURCES_FOLDER_PATH)/main.jsbundle", ); runOnlyForDeploymentPostprocessing = 0; shellPath = /bin/sh; - shellScript = "set -e\n\nexport NODE_BINARY=node\nexport PROJECT_ROOT=$PWD/..\nexport ENTRY_FILE=index.tsx\n../node_modules/react-native/scripts/react-native-xcode.sh\n"; + shellScript = "if [ \"$CONFIGURATION\" == \"Release\" ]; then\n export NODE_BINARY=$(which node)\n export ENTRY_FILE=\"index.tsx\"\n export EXTRA_PACKAGER_ARGS=\"--reset-cache\"\n\n ../node_modules/react-native/scripts/react-native-xcode.sh\n echo \"copying bundle file from $CONFIGURATION_BUILD_DIR/main.jsbundlecd to $CONFIGURATION_BUILD_DIR/$UNLOCALIZED_RESOURCES_FOLDER_PATH/\"\n echo \"This cp might not need it post 0.7.15 because it is a bug from facebook\" \n cp $CONFIGURATION_BUILD_DIR/main.jsbundle $CONFIGURATION_BUILD_DIR/$UNLOCALIZED_RESOURCES_FOLDER_PATH/\nfi\n"; + }; + 9BBEFBEFBEE7FC814F312449 /* [CP] Copy Pods Resources */ = { + isa = PBXShellScriptBuildPhase; + buildActionMask = 2147483647; + files = ( + ); + inputPaths = ( + "${PODS_ROOT}/Target Support Files/Pods-OnnxruntimeModuleExample/Pods-OnnxruntimeModuleExample-resources.sh", + "${PODS_CONFIGURATION_BUILD_DIR}/React-Core/AccessibilityResources.bundle", + ); + name = "[CP] Copy Pods Resources"; + outputPaths = ( + "${TARGET_BUILD_DIR}/${UNLOCALIZED_RESOURCES_FOLDER_PATH}/AccessibilityResources.bundle", + ); + runOnlyForDeploymentPostprocessing = 0; + shellPath = /bin/sh; + shellScript = "\"${PODS_ROOT}/Target Support Files/Pods-OnnxruntimeModuleExample/Pods-OnnxruntimeModuleExample-resources.sh\"\n"; + showEnvVarsInLog = 0; }; FD10A7F022414F080027D42C /* Start Packager */ = { isa = PBXShellScriptBuildPhase; @@ -281,6 +314,28 @@ shellScript = "export RCT_METRO_PORT=\"${RCT_METRO_PORT:=8081}\"\necho \"export RCT_METRO_PORT=${RCT_METRO_PORT}\" > \"${SRCROOT}/../node_modules/react-native/scripts/.packager.env\"\nif [ -z \"${RCT_NO_LAUNCH_PACKAGER+xxx}\" ] ; then\n if nc -w 5 -z localhost ${RCT_METRO_PORT} ; then\n if ! curl -s \"http://localhost:${RCT_METRO_PORT}/status\" | grep -q \"packager-status:running\" ; then\n echo \"Port ${RCT_METRO_PORT} already in use, packager is either not running or not running correctly\"\n exit 2\n fi\n else\n open \"$SRCROOT/../node_modules/react-native/scripts/launchPackager.command\" || echo \"Can't start packager automatically\"\n fi\nfi\n"; showEnvVarsInLog = 0; }; + FF1F546E251E9524E4930013 /* [CP] Check Pods Manifest.lock */ = { + isa = PBXShellScriptBuildPhase; + buildActionMask = 2147483647; + files = ( + ); + inputFileListPaths = ( + ); + inputPaths = ( + "${PODS_PODFILE_DIR_PATH}/Podfile.lock", + "${PODS_ROOT}/Manifest.lock", + ); + name = "[CP] Check Pods Manifest.lock"; + outputFileListPaths = ( + ); + outputPaths = ( + "$(DERIVED_FILE_DIR)/Pods-OnnxruntimeModuleExample-checkManifestLockResult.txt", + ); + runOnlyForDeploymentPostprocessing = 0; + shellPath = /bin/sh; + shellScript = "diff \"${PODS_PODFILE_DIR_PATH}/Podfile.lock\" \"${PODS_ROOT}/Manifest.lock\" > /dev/null\nif [ $? != 0 ] ; then\n # print error to STDERR\n echo \"error: The sandbox is not in sync with the Podfile.lock. Run 'pod install' or update your CocoaPods installation.\" >&2\n exit 1\nfi\n# This output is used by Xcode 'outputs' to avoid re-running this script phase.\necho \"SUCCESS\" > \"${SCRIPT_OUTPUT_FILE_0}\"\n"; + showEnvVarsInLog = 0; + }; /* End PBXShellScriptBuildPhase section */ /* Begin PBXSourcesBuildPhase section */ @@ -315,6 +370,7 @@ /* Begin XCBuildConfiguration section */ 13B07F941A680F5B00A75B9A /* Debug */ = { isa = XCBuildConfiguration; + baseConfigurationReference = 9D58C0FCCF00905433F4ED74 /* Pods-OnnxruntimeModuleExample.debug.xcconfig */; buildSettings = { ASSETCATALOG_COMPILER_APPICON_NAME = AppIcon; CLANG_ENABLE_MODULES = YES; @@ -339,6 +395,7 @@ }; 13B07F951A680F5B00A75B9A /* Release */ = { isa = XCBuildConfiguration; + baseConfigurationReference = B70FCE6DFAB320E9051DA321 /* Pods-OnnxruntimeModuleExample.release.xcconfig */; buildSettings = { ASSETCATALOG_COMPILER_APPICON_NAME = AppIcon; CLANG_ENABLE_MODULES = YES; @@ -363,8 +420,9 @@ isa = XCBuildConfiguration; buildSettings = { ALWAYS_SEARCH_USER_PATHS = NO; + ARCHS = "$(ARCHS_STANDARD)"; CLANG_ANALYZER_LOCALIZABILITY_NONLOCALIZED = YES; - CLANG_CXX_LANGUAGE_STANDARD = "gnu++0x"; + CLANG_CXX_LANGUAGE_STANDARD = "c++17"; CLANG_CXX_LIBRARY = "libc++"; CLANG_ENABLE_MODULES = YES; CLANG_ENABLE_OBJC_ARC = YES; @@ -391,6 +449,7 @@ COPY_PHASE_STRIP = NO; ENABLE_STRICT_OBJC_MSGSEND = YES; ENABLE_TESTABILITY = YES; + "EXCLUDED_ARCHS[sdk=iphonesimulator*]" = ""; GCC_C_LANGUAGE_STANDARD = gnu99; GCC_DYNAMIC_NO_PIC = NO; GCC_NO_COMMON_BLOCKS = YES; @@ -398,6 +457,7 @@ GCC_PREPROCESSOR_DEFINITIONS = ( "DEBUG=1", "$(inherited)", + _LIBCPP_ENABLE_CXX17_REMOVED_UNARY_BINARY_FUNCTION, ); GCC_SYMBOLS_PRIVATE_EXTERN = NO; GCC_WARN_64_TO_32_BIT_CONVERSION = YES; @@ -410,12 +470,14 @@ IPHONEOS_DEPLOYMENT_TARGET = 15.1; LD_RUNPATH_SEARCH_PATHS = "/usr/lib/swift $(inherited)"; LIBRARY_SEARCH_PATHS = ( + "$(SDKROOT)/usr/lib/swift", "\"$(TOOLCHAIN_DIR)/usr/lib/swift/$(PLATFORM_NAME)\"", - "\"$(TOOLCHAIN_DIR)/usr/lib/swift-5.0/$(PLATFORM_NAME)\"", "\"$(inherited)\"", ); MTL_ENABLE_DEBUG_INFO = YES; ONLY_ACTIVE_ARCH = YES; + OTHER_LDFLAGS = "$(inherited)"; + REACT_NATIVE_PATH = "${PODS_ROOT}/../../node_modules/react-native"; SDKROOT = iphoneos; }; name = Debug; @@ -424,8 +486,9 @@ isa = XCBuildConfiguration; buildSettings = { ALWAYS_SEARCH_USER_PATHS = NO; + ARCHS = "$(ARCHS_STANDARD)"; CLANG_ANALYZER_LOCALIZABILITY_NONLOCALIZED = YES; - CLANG_CXX_LANGUAGE_STANDARD = "gnu++0x"; + CLANG_CXX_LANGUAGE_STANDARD = "c++17"; CLANG_CXX_LIBRARY = "libc++"; CLANG_ENABLE_MODULES = YES; CLANG_ENABLE_OBJC_ARC = YES; @@ -452,8 +515,13 @@ COPY_PHASE_STRIP = YES; ENABLE_NS_ASSERTIONS = NO; ENABLE_STRICT_OBJC_MSGSEND = YES; + "EXCLUDED_ARCHS[sdk=iphonesimulator*]" = ""; GCC_C_LANGUAGE_STANDARD = gnu99; GCC_NO_COMMON_BLOCKS = YES; + GCC_PREPROCESSOR_DEFINITIONS = ( + "$(inherited)", + _LIBCPP_ENABLE_CXX17_REMOVED_UNARY_BINARY_FUNCTION, + ); GCC_WARN_64_TO_32_BIT_CONVERSION = YES; GCC_WARN_ABOUT_RETURN_TYPE = YES_ERROR; GCC_WARN_UNDECLARED_SELECTOR = YES; @@ -463,11 +531,13 @@ IPHONEOS_DEPLOYMENT_TARGET = 15.1; LD_RUNPATH_SEARCH_PATHS = "/usr/lib/swift $(inherited)"; LIBRARY_SEARCH_PATHS = ( + "$(SDKROOT)/usr/lib/swift", "\"$(TOOLCHAIN_DIR)/usr/lib/swift/$(PLATFORM_NAME)\"", - "\"$(TOOLCHAIN_DIR)/usr/lib/swift-5.0/$(PLATFORM_NAME)\"", "\"$(inherited)\"", ); MTL_ENABLE_DEBUG_INFO = NO; + OTHER_LDFLAGS = "$(inherited)"; + REACT_NATIVE_PATH = "${PODS_ROOT}/../../node_modules/react-native"; SDKROOT = iphoneos; VALIDATE_PRODUCT = YES; }; diff --git a/js/react_native/e2e/ios/Podfile b/js/react_native/e2e/ios/Podfile index 5263e585b0a12..78886dd3541e2 100644 --- a/js/react_native/e2e/ios/Podfile +++ b/js/react_native/e2e/ios/Podfile @@ -3,6 +3,14 @@ require_relative '../node_modules/@react-native-community/cli-platform-ios/nativ platform :ios, '15.1' +prepare_react_native_project! + +linkage = ENV['USE_FRAMEWORKS'] +if linkage != nil + Pod::UI.puts "Configuring Pod with #{linkage}ally linked Frameworks".green + use_frameworks! :linkage => linkage.to_sym +end + pre_install do |installer| # Custom pre-install script or commands puts "Running pre-install script..." @@ -15,9 +23,18 @@ end target 'OnnxruntimeModuleExample' do config = use_native_modules! - use_react_native!(:path => config["reactNativePath"]) + use_react_native!( + :path => config[:reactNativePath], + + # Hermes is now enabled by default. Disable by setting this flag to false. + # Upcoming versions of React Native may rely on get_default_flags(), but + # we make it explicit here to aid in the React Native upgrade process. + + :hermes_enabled => false, +# :flipper_configuration => FlipperConfiguration.enabled, + :app_path => "#{Pod::Config.instance.installation_root}/.." + ) - use_frameworks! ort_c_local_pod_path = ENV['ORT_C_LOCAL_POD_PATH'] if ort_c_local_pod_path != nil @@ -27,13 +44,14 @@ target 'OnnxruntimeModuleExample' do pod 'onnxruntime-react-native', :path => '../node_modules/onnxruntime-react-native' inherit! :search_paths -end -post_install do |installer| - installer.generated_projects.each do |project| - project.targets.each do |target| - target.build_configurations.each do |config| - config.build_settings['IPHONEOS_DEPLOYMENT_TARGET'] = '15.1' - end - end - end + post_install do |installer| + react_native_post_install( + installer, + config[:reactNativePath], + # Set `mac_catalyst_enabled` to `true` in order to apply patches + # necessary for Mac Catalyst builds + :mac_catalyst_enabled => false, + ) + __apply_Xcode_12_5_M1_post_install_workaround(installer) + end end diff --git a/js/react_native/e2e/ios/PrivacyInfo.xcprivacy b/js/react_native/e2e/ios/PrivacyInfo.xcprivacy new file mode 100644 index 0000000000000..549cd5d8b600f --- /dev/null +++ b/js/react_native/e2e/ios/PrivacyInfo.xcprivacy @@ -0,0 +1,37 @@ + + + + + NSPrivacyCollectedDataTypes + + NSPrivacyAccessedAPITypes + + + NSPrivacyAccessedAPIType + NSPrivacyAccessedAPICategoryFileTimestamp + NSPrivacyAccessedAPITypeReasons + + C617.1 + + + + NSPrivacyAccessedAPIType + NSPrivacyAccessedAPICategoryUserDefaults + NSPrivacyAccessedAPITypeReasons + + CA92.1 + + + + NSPrivacyAccessedAPIType + NSPrivacyAccessedAPICategorySystemBootTime + NSPrivacyAccessedAPITypeReasons + + 35F9.1 + + + + NSPrivacyTracking + + + diff --git a/js/react_native/e2e/ios/main.jsbundle b/js/react_native/e2e/ios/main.jsbundle deleted file mode 100644 index 298ce609bb9a3..0000000000000 --- a/js/react_native/e2e/ios/main.jsbundle +++ /dev/null @@ -1,404 +0,0 @@ -var __BUNDLE_START_TIME__=this.nativePerformanceNow?nativePerformanceNow():Date.now(),__DEV__=false,process=this.process||{};process.env=process.env||{};process.env.NODE_ENV=process.env.NODE_ENV||"production"; -!(function(r){"use strict";r.__r=o,r.__d=function(r,i,n){if(null!=e[i])return;var o={dependencyMap:n,factory:r,hasError:!1,importedAll:t,importedDefault:t,isInitialized:!1,publicModule:{exports:{}}};e[i]=o},r.__c=n,r.__registerSegment=function(r,e){s[r]=e};var e=n(),t={},i={}.hasOwnProperty;function n(){return e=Object.create(null)}function o(r){var t=r,i=e[t];return i&&i.isInitialized?i.publicModule.exports:d(t,i)}function l(r){var i=r;if(e[i]&&e[i].importedDefault!==t)return e[i].importedDefault;var n=o(i),l=n&&n.__esModule?n.default:n;return e[i].importedDefault=l}function u(r){var n=r;if(e[n]&&e[n].importedAll!==t)return e[n].importedAll;var l,u=o(n);if(u&&u.__esModule)l=u;else{if(l={},u)for(var a in u)i.call(u,a)&&(l[a]=u[a]);l.default=u}return e[n].importedAll=l}o.importDefault=l,o.importAll=u;var a=!1;function d(e,t){if(!a&&r.ErrorUtils){var i;a=!0;try{i=v(e,t)}catch(e){r.ErrorUtils.reportFatalError(e)}return a=!1,i}return v(e,t)}var c=16,f=65535;function p(r){return{segmentId:r>>>c,localId:r&f}}o.unpackModuleId=p,o.packModuleId=function(r){return(r.segmentId<0){var n=p(t),a=n.segmentId,d=n.localId,c=s[a];null!=c&&(c(d),i=e[t])}var f=r.nativeRequire;if(!i&&f){var v=p(t),h=v.segmentId;f(v.localId,h),i=e[t]}if(!i)throw Error('Requiring unknown module "'+t+'".');if(i.hasError)throw m(t,i.error);i.isInitialized=!0;var I=i,g=I.factory,y=I.dependencyMap;try{var _=i.publicModule;return _.id=t,g(r,o,l,u,_,_.exports,y),i.factory=void 0,i.dependencyMap=void 0,_.exports}catch(r){throw i.hasError=!0,i.error=r,i.isInitialized=!1,i.publicModule.exports=void 0,r}}function m(r,e){return Error('Requiring module "'+r+'", which threw an exception: '+e)}})('undefined'!=typeof globalThis?globalThis:'undefined'!=typeof global?global:'undefined'!=typeof window?window:this); -!(function(n){var e=(function(){function n(n,e){return n}function e(n){var e={};return n.forEach(function(n,r){e[n]=!0}),e}function r(n,r,u){if(n.formatValueCalls++,n.formatValueCalls>200)return"[TOO BIG formatValueCalls "+n.formatValueCalls+" exceeded limit of 200]";var f=t(n,r);if(f)return f;var c=Object.keys(r),s=e(c);if(d(r)&&(c.indexOf('message')>=0||c.indexOf('description')>=0))return o(r);if(0===c.length){if(v(r)){var g=r.name?': '+r.name:'';return n.stylize('[Function'+g+']','special')}if(p(r))return n.stylize(RegExp.prototype.toString.call(r),'regexp');if(y(r))return n.stylize(Date.prototype.toString.call(r),'date');if(d(r))return o(r)}var h,b,m='',j=!1,O=['{','}'];(h=r,Array.isArray(h)&&(j=!0,O=['[',']']),v(r))&&(m=' [Function'+(r.name?': '+r.name:'')+']');return p(r)&&(m=' '+RegExp.prototype.toString.call(r)),y(r)&&(m=' '+Date.prototype.toUTCString.call(r)),d(r)&&(m=' '+o(r)),0!==c.length||j&&0!=r.length?u<0?p(r)?n.stylize(RegExp.prototype.toString.call(r),'regexp'):n.stylize('[Object]','special'):(n.seen.push(r),b=j?i(n,r,u,s,c):c.map(function(e){return l(n,r,u,s,e,j)}),n.seen.pop(),a(b,m,O)):O[0]+m+O[1]}function t(n,e){if(s(e))return n.stylize('undefined','undefined');if('string'==typeof e){var r="'"+JSON.stringify(e).replace(/^"|"$/g,'').replace(/'/g,"\\'").replace(/\\"/g,'"')+"'";return n.stylize(r,'string')}return c(e)?n.stylize(''+e,'number'):u(e)?n.stylize(''+e,'boolean'):f(e)?n.stylize('null','null'):void 0}function o(n){return'['+Error.prototype.toString.call(n)+']'}function i(n,e,r,t,o){for(var i=[],a=0,u=e.length;a-1&&(u=l?u.split('\n').map(function(n){return' '+n}).join('\n').substr(2):'\n'+u.split('\n').map(function(n){return' '+n}).join('\n')):u=n.stylize('[Circular]','special')),s(a)){if(l&&i.match(/^\d+$/))return u;(a=JSON.stringify(''+i)).match(/^"([a-zA-Z_][a-zA-Z_0-9]*)"$/)?(a=a.substr(1,a.length-2),a=n.stylize(a,'name')):(a=a.replace(/'/g,"\\'").replace(/\\"/g,'"').replace(/(^"|"$)/g,"'"),a=n.stylize(a,'string'))}return a+': '+u}function a(n,e,r){return n.reduce(function(n,e){return 0,e.indexOf('\n')>=0&&0,n+e.replace(/\u001b\[\d\d?m/g,'').length+1},0)>60?r[0]+(''===e?'':e+'\n ')+' '+n.join(',\n ')+' '+r[1]:r[0]+e+' '+n.join(', ')+' '+r[1]}function u(n){return'boolean'==typeof n}function f(n){return null===n}function c(n){return'number'==typeof n}function s(n){return void 0===n}function p(n){return g(n)&&'[object RegExp]'===h(n)}function g(n){return'object'==typeof n&&null!==n}function y(n){return g(n)&&'[object Date]'===h(n)}function d(n){return g(n)&&('[object Error]'===h(n)||n instanceof Error)}function v(n){return'function'==typeof n}function h(n){return Object.prototype.toString.call(n)}function b(n,e){return Object.prototype.hasOwnProperty.call(n,e)}return function(e,t){return r({seen:[],formatValueCalls:0,stylize:n},e,t.depth)}})(),r='(index)',t={trace:0,info:1,warn:2,error:3},o=[];o[t.trace]='debug',o[t.info]='log',o[t.warn]='warning',o[t.error]='error';var i=1;function l(r){return function(){var l;l=1===arguments.length&&'string'==typeof arguments[0]?arguments[0]:Array.prototype.map.call(arguments,function(n){return e(n,{depth:10})}).join(', ');var a=arguments[0],u=r;'string'==typeof a&&'Warning: '===a.slice(0,9)&&u>=t.error&&(u=t.warn),n.__inspectorLog&&n.__inspectorLog(o[u],l,[].slice.call(arguments),i),s.length&&(l=p('',l)),n.nativeLoggingHook(l,u)}}function a(n,e){return Array.apply(null,Array(e)).map(function(){return n})}var u="\u2502",f="\u2510",c="\u2518",s=[];function p(n,e){return s.join('')+n+' '+(e||'')}if(n.nativeLoggingHook){n.console;n.console={error:l(t.error),info:l(t.info),log:l(t.info),warn:l(t.warn),trace:l(t.trace),debug:l(t.trace),table:function(e){if(!Array.isArray(e)){var o=e;for(var i in e=[],o)if(o.hasOwnProperty(i)){var l=o[i];l[r]=i,e.push(l)}}if(0!==e.length){var u=Object.keys(e[0]).sort(),f=[],c=[];u.forEach(function(n,r){c[r]=n.length;for(var t=0;t';return function(){for(var r=arguments.length,u=new Array(r),e=0;e0&&c.createElement(r(d[13]).Text,null,null==u?void 0:u.edges.length," photos fetched"),null!==o&&c.createElement(r(d[13]).Text,null,"Time taken: ",o," ms"),null!==l&&c.createElement(r(d[13]).Text,null,"Inference time taken: ",l," ms"),c.createElement(r(d[13]).ScrollView,null,null==h?void 0:h.map(function(t){return c.createElement(r(d[13]).View,{style:b.itemContainer},c.createElement(r(d[13]).Image,{source:{uri:f},style:b.image}),c.createElement(r(d[13]).Text,{style:b.textInput},t.classified,"--------------------------------\n","Preprocess time taken: ",t.duration.preprocess," ms ","\n","Inference time taken: ",t.duration.inference," ms ","\n","Postprocess time taken: ",t.duration.postprocess," ms ","\n"))})))}}]),k})(c.PureComponent);e.default=w;var b=r(d[13]).StyleSheet.create({container:{flex:1,padding:8},inputRow:{flexDirection:'row',alignItems:'center',justifyContent:'space-between',paddingVertical:2},textInput:{borderColor:'#ccc',fontSize:17,borderWidth:0,paddingVertical:4,paddingHorizontal:8},error:{color:'#f00'},textInputError:{borderColor:'#f00'},itemContainer:{borderColor:'#ccc',borderWidth:2},image:{flex:1,borderColor:'#ccc',resizeMode:'stretch',width:200,height:200}})},2,[1,3,5,6,7,9,12,13,14,18,389,390,391,21]); -__d(function(g,r,i,a,m,e,d){m.exports=r(d[0])},3,[4]); -__d(function(g,r,i,a,m,e,d){var t=(function(t){"use strict";var n,o=Object.prototype,c=o.hasOwnProperty,u="function"==typeof Symbol?Symbol:{},h=u.iterator||"@@iterator",f=u.asyncIterator||"@@asyncIterator",l=u.toStringTag||"@@toStringTag";function s(t,n,o){return Object.defineProperty(t,n,{value:o,enumerable:!0,configurable:!0,writable:!0}),t[n]}try{s({},"")}catch(t){s=function(t,n,o){return t[n]=o}}function p(t,n,o,c){var u=n&&n.prototype instanceof E?n:E,h=Object.create(u.prototype),f=new R(c||[]);return h._invoke=S(t,o,f),h}function y(t,n,o){try{return{type:"normal",arg:t.call(n,o)}}catch(t){return{type:"throw",arg:t}}}t.wrap=p;var v="suspendedStart",w="suspendedYield",L="executing",x="completed",b={};function E(){}function _(){}function j(){}var O={};O[h]=function(){return this};var k=Object.getPrototypeOf,G=k&&k(k(Y([])));G&&G!==o&&c.call(G,h)&&(O=G);var N=j.prototype=E.prototype=Object.create(O);function F(t){["next","throw","return"].forEach(function(n){s(t,n,function(t){return this._invoke(n,t)})})}function P(t,n){function o(u,h,f,l){var s=y(t[u],t,h);if("throw"!==s.type){var p=s.arg,v=p.value;return v&&"object"==typeof v&&c.call(v,"__await")?n.resolve(v.__await).then(function(t){o("next",t,f,l)},function(t){o("throw",t,f,l)}):n.resolve(v).then(function(t){p.value=t,f(p)},function(t){return o("throw",t,f,l)})}l(s.arg)}var u;this._invoke=function(t,c){function h(){return new n(function(n,u){o(t,c,n,u)})}return u=u?u.then(h,h):h()}}function S(t,n,o){var c=v;return function(u,h){if(c===L)throw new Error("Generator is already running");if(c===x){if("throw"===u)throw h;return q()}for(o.method=u,o.arg=h;;){var f=o.delegate;if(f){var l=T(f,o);if(l){if(l===b)continue;return l}}if("next"===o.method)o.sent=o._sent=o.arg;else if("throw"===o.method){if(c===v)throw c=x,o.arg;o.dispatchException(o.arg)}else"return"===o.method&&o.abrupt("return",o.arg);c=L;var s=y(t,n,o);if("normal"===s.type){if(c=o.done?x:w,s.arg===b)continue;return{value:s.arg,done:o.done}}"throw"===s.type&&(c=x,o.method="throw",o.arg=s.arg)}}}function T(t,o){var c=t.iterator[o.method];if(c===n){if(o.delegate=null,"throw"===o.method){if(t.iterator.return&&(o.method="return",o.arg=n,T(t,o),"throw"===o.method))return b;o.method="throw",o.arg=new TypeError("The iterator does not provide a 'throw' method")}return b}var u=y(c,t.iterator,o.arg);if("throw"===u.type)return o.method="throw",o.arg=u.arg,o.delegate=null,b;var h=u.arg;return h?h.done?(o[t.resultName]=h.value,o.next=t.nextLoc,"return"!==o.method&&(o.method="next",o.arg=n),o.delegate=null,b):h:(o.method="throw",o.arg=new TypeError("iterator result is not an object"),o.delegate=null,b)}function I(t){var n={tryLoc:t[0]};1 in t&&(n.catchLoc=t[1]),2 in t&&(n.finallyLoc=t[2],n.afterLoc=t[3]),this.tryEntries.push(n)}function A(t){var n=t.completion||{};n.type="normal",delete n.arg,t.completion=n}function R(t){this.tryEntries=[{tryLoc:"root"}],t.forEach(I,this),this.reset(!0)}function Y(t){if(t){var o=t[h];if(o)return o.call(t);if("function"==typeof t.next)return t;if(!isNaN(t.length)){var u=-1,f=function o(){for(;++u=0;--h){var f=this.tryEntries[h],l=f.completion;if("root"===f.tryLoc)return u("end");if(f.tryLoc<=this.prev){var s=c.call(f,"catchLoc"),p=c.call(f,"finallyLoc");if(s&&p){if(this.prev=0;--o){var u=this.tryEntries[o];if(u.tryLoc<=this.prev&&c.call(u,"finallyLoc")&&this.prev=0;--n){var o=this.tryEntries[n];if(o.finallyLoc===t)return this.complete(o.completion,o.afterLoc),A(o),b}},catch:function(t){for(var n=this.tryEntries.length-1;n>=0;--n){var o=this.tryEntries[n];if(o.tryLoc===t){var c=o.completion;if("throw"===c.type){var u=c.arg;A(o)}return u}}throw new Error("illegal catch attempt")},delegateYield:function(t,o,c){return this.delegate={iterator:Y(t),resultName:o,nextLoc:c},"next"===this.method&&(this.arg=n),b}},t})("object"==typeof m?m.exports:{});try{regeneratorRuntime=t}catch(n){Function("r","regeneratorRuntime = r")(t)}},4,[]); -__d(function(g,r,i,a,m,e,d){m.exports=function(n,o){if(!(n instanceof o))throw new TypeError("Cannot call a class as a function")}},5,[]); -__d(function(g,r,i,a,m,e,d){function n(n,t){for(var o=0;oF.length&&F.push(t)}function D(t,u,f,l){var c=typeof t;"undefined"!==c&&"boolean"!==c||(t=null);var s=!1;if(null===t)s=!0;else switch(c){case"string":case"number":s=!0;break;case"object":switch(t.$$typeof){case n:case o:s=!0}}if(s)return f(l,t,""===u?"."+B(t,0):u),1;if(s=0,u=""===u?".":u+":",Array.isArray(t))for(var p=0;p1&&void 0!==arguments[1]?arguments[1]:{},s=o.type,l=void 0===s?'auto':s,u=o.album,c=void 0===u?'':u;return r(d[5])('string'==typeof t,'CameraRoll.saveToCameraRoll must be a valid string.'),r(d[5])('photo'===o.type||'video'===o.type||'auto'===o.type||void 0===o.type,"The second argument to saveToCameraRoll must be 'photo' or 'video' or 'auto'. You passed "+(l||'unknown')),'auto'===l&&(l=['mov','mp4'].indexOf(t.split('.').slice(-1)[0])>=0?'video':'photo'),n.default.saveToCameraRoll(t,{type:l,album:c})}},{key:"saveToCameraRoll",value:function(o,s){return console.warn('CameraRoll.saveToCameraRoll(tag, type) is deprecated. Use the save function instead'),t.save(o,{type:s})}},{key:"getAlbums",value:function(){var t=arguments.length>0&&void 0!==arguments[0]?arguments[0]:{assetType:p.All};return n.default.getAlbums(t)}},{key:"getParamsWithDefaults",value:function(t){var o=u({},t);return o.assetType||(o.assetType=p.All),o.groupTypes||'android'===r(d[6]).Platform.OS||(o.groupTypes=c.All),o}},{key:"getPhotos",value:function(o){o=t.getParamsWithDefaults(o);var s=n.default.getPhotos(o);if(arguments.length>1){console.warn('CameraRoll.getPhotos(tag, success, error) is deprecated. Use the returned Promise instead');var l=arguments[1],u=arguments[2]||function(){};s.then(l,u)}return s}}]),t})();v.GroupTypesOptions=c,v.AssetTypeOptions=p,m.exports=v},18,[1,19,5,6,20,211,21]); -__d(function(g,r,i,a,m,e,d){m.exports=function(n,t,u){return t in n?Object.defineProperty(n,t,{value:u,enumerable:!0,configurable:!0,writable:!0}):n[t]=u,n}},19,[]); -__d(function(g,r,i,a,m,e,d){Object.defineProperty(e,"__esModule",{value:!0}),e.default=void 0;var l=r(d[0]).NativeModules.RNCCameraRoll;e.default=l},20,[21]); -__d(function(g,r,i,a,m,e,d){'use strict';m.exports={get AccessibilityInfo(){return r(d[0])},get ActivityIndicator(){return r(d[1])},get Button(){return r(d[2])},get CheckBox(){return r(d[3])('checkBox-moved',"CheckBox has been extracted from react-native core and will be removed in a future release. It can now be installed and imported from '@react-native-community/checkbox' instead of 'react-native'. See https://github.com/react-native-community/react-native-checkbox"),r(d[4])},get DatePickerIOS(){return r(d[3])('DatePickerIOS-merged',"DatePickerIOS has been merged with DatePickerAndroid and will be removed in a future release. It can now be installed and imported from '@react-native-community/datetimepicker' instead of 'react-native'. See https://github.com/react-native-community/datetimepicker"),r(d[5])},get DrawerLayoutAndroid(){return r(d[6])},get FlatList(){return r(d[7])},get Image(){return r(d[8])},get ImageBackground(){return r(d[9])},get InputAccessoryView(){return r(d[10])},get KeyboardAvoidingView(){return r(d[11])},get MaskedViewIOS(){return r(d[3])('maskedviewios-moved',"MaskedViewIOS has been extracted from react-native core and will be removed in a future release. It can now be installed and imported from '@react-native-community/masked-view' instead of 'react-native'. See https://github.com/react-native-community/react-native-masked-view"),r(d[12])},get Modal(){return r(d[13])},get Picker(){return r(d[3])('picker-moved',"Picker has been extracted from react-native core and will be removed in a future release. It can now be installed and imported from '@react-native-community/picker' instead of 'react-native'. See https://github.com/react-native-community/react-native-picker"),r(d[14])},get PickerIOS(){return r(d[3])('pickerios-moved',"PickerIOS has been extracted from react-native core and will be removed in a future release. It can now be installed and imported from '@react-native-community/picker' instead of 'react-native'. See https://github.com/react-native-community/react-native-picker"),r(d[15])},get ProgressBarAndroid(){return r(d[3])('progress-bar-android-moved',"ProgressBarAndroid has been extracted from react-native core and will be removed in a future release. It can now be installed and imported from '@react-native-community/progress-bar-android' instead of 'react-native'. See https://github.com/react-native-community/progress-bar-android"),r(d[16])},get ProgressViewIOS(){return r(d[3])('progress-view-ios-moved',"ProgressViewIOS has been extracted from react-native core and will be removed in a future release. It can now be installed and imported from '@react-native-community/progress-view' instead of 'react-native'. See https://github.com/react-native-community/progress-view"),r(d[17])},get SafeAreaView(){return r(d[18])},get ScrollView(){return r(d[19])},get SectionList(){return r(d[20])},get SegmentedControlIOS(){return r(d[3])('segmented-control-ios-moved',"SegmentedControlIOS has been extracted from react-native core and will be removed in a future release. It can now be installed and imported from '@react-native-community/segmented-control' instead of 'react-native'. See https://github.com/react-native-community/segmented-control"),r(d[21])},get Slider(){return r(d[3])('slider-moved',"Slider has been extracted from react-native core and will be removed in a future release. It can now be installed and imported from '@react-native-community/slider' instead of 'react-native'. See https://github.com/react-native-community/react-native-slider"),r(d[22])},get Switch(){return r(d[23])},get RefreshControl(){return r(d[24])},get StatusBar(){return r(d[25])},get Text(){return r(d[26])},get TextInput(){return r(d[27])},get Touchable(){return r(d[28])},get TouchableHighlight(){return r(d[29])},get TouchableNativeFeedback(){return r(d[30])},get TouchableOpacity(){return r(d[31])},get TouchableWithoutFeedback(){return r(d[32])},get View(){return r(d[33])},get VirtualizedList(){return r(d[34])},get VirtualizedSectionList(){return r(d[35])},get ActionSheetIOS(){return r(d[36])},get Alert(){return r(d[37])},get Animated(){return r(d[38])},get Appearance(){return r(d[39])},get AppRegistry(){return r(d[40])},get AppState(){return r(d[41])},get AsyncStorage(){return r(d[3])('async-storage-moved',"AsyncStorage has been extracted from react-native core and will be removed in a future release. It can now be installed and imported from '@react-native-community/async-storage' instead of 'react-native'. See https://github.com/react-native-community/async-storage"),r(d[42])},get BackHandler(){return r(d[43])},get Clipboard(){return r(d[3])('clipboard-moved',"Clipboard has been extracted from react-native core and will be removed in a future release. It can now be installed and imported from '@react-native-community/clipboard' instead of 'react-native'. See https://github.com/react-native-community/clipboard"),r(d[44])},get DatePickerAndroid(){return r(d[3])('DatePickerAndroid-merged',"DatePickerAndroid has been merged with DatePickerIOS and will be removed in a future release. It can now be installed and imported from '@react-native-community/datetimepicker' instead of 'react-native'. See https://github.com/react-native-community/datetimepicker"),r(d[45])},get DeviceInfo(){return r(d[46])},get DevSettings(){return r(d[47])},get Dimensions(){return r(d[48])},get Easing(){return r(d[49])},get findNodeHandle(){return r(d[50]).findNodeHandle},get I18nManager(){return r(d[51])},get ImagePickerIOS(){return r(d[3])('imagePickerIOS-moved',"ImagePickerIOS has been extracted from react-native core and will be removed in a future release. Please upgrade to use either '@react-native-community/react-native-image-picker' or 'expo-image-picker'. If you cannot upgrade to a different library, please install the deprecated '@react-native-community/image-picker-ios' package. See https://github.com/react-native-community/react-native-image-picker-ios"),r(d[52])},get InteractionManager(){return r(d[53])},get Keyboard(){return r(d[54])},get LayoutAnimation(){return r(d[55])},get Linking(){return r(d[56])},get NativeDialogManagerAndroid(){return r(d[57]).default},get NativeEventEmitter(){return r(d[58])},get Networking(){return r(d[59])},get PanResponder(){return r(d[60])},get PermissionsAndroid(){return r(d[61])},get PixelRatio(){return r(d[62])},get PushNotificationIOS(){return r(d[3])('pushNotificationIOS-moved',"PushNotificationIOS has been extracted from react-native core and will be removed in a future release. It can now be installed and imported from '@react-native-community/push-notification-ios' instead of 'react-native'. See https://github.com/react-native-community/push-notification-ios"),r(d[63])},get Settings(){return r(d[64])},get Share(){return r(d[65])},get StatusBarIOS(){return r(d[3])('StatusBarIOS-merged','StatusBarIOS has been merged with StatusBar and will be removed in a future release. Use StatusBar for mutating the status bar'),r(d[66])},get StyleSheet(){return r(d[67])},get Systrace(){return r(d[68])},get ToastAndroid(){return r(d[69])},get TurboModuleRegistry(){return r(d[70])},get TVEventHandler(){return r(d[71])},get UIManager(){return r(d[72])},get unstable_batchedUpdates(){return r(d[50]).unstable_batchedUpdates},get useColorScheme(){return r(d[73]).default},get useWindowDimensions(){return r(d[74]).default},get UTFSequence(){return r(d[75])},get Vibration(){return r(d[76])},get YellowBox(){return r(d[77])},get DeviceEventEmitter(){return r(d[78])},get NativeAppEventEmitter(){return r(d[79])},get NativeModules(){return r(d[80])},get Platform(){return r(d[81])},get processColor(){return r(d[82])},get requireNativeComponent(){return r(d[83])},get unstable_RootTagContext(){return r(d[84])},get unstable_enableLogBox(){return r(d[77]).__unstable_enableLogBox},get ColorPropType(){return r(d[85])},get EdgeInsetsPropType(){return r(d[86])},get PointPropType(){return r(d[87])},get ViewPropTypes(){return r(d[88])}}},21,[22,55,192,290,291,293,295,236,267,296,297,299,300,302,307,308,310,311,313,237,277,315,317,319,258,322,280,325,282,330,331,193,326,128,257,278,332,170,201,334,336,351,354,341,356,358,359,360,123,226,131,305,362,209,251,254,364,171,160,154,366,368,122,370,372,374,376,191,35,377,24,199,87,378,381,79,382,384,48,181,26,82,114,58,343,105,274,388,328]); -__d(function(g,r,i,a,m,e,d){'use strict';var n=r(d[0])(r(d[1])),t={announcementFinished:'announcementFinished',boldTextChanged:'boldTextChanged',grayscaleChanged:'grayscaleChanged',invertColorsChanged:'invertColorsChanged',reduceMotionChanged:'reduceMotionChanged',reduceTransparencyChanged:'reduceTransparencyChanged',screenReaderChanged:'screenReaderChanged'},c=new Map,u={isBoldTextEnabled:function(){return new(r(d[2]))(function(t,c){n.default?n.default.getCurrentBoldTextState(t,c):c(c)})},isGrayscaleEnabled:function(){return new(r(d[2]))(function(t,c){n.default?n.default.getCurrentGrayscaleState(t,c):c(c)})},isInvertColorsEnabled:function(){return new(r(d[2]))(function(t,c){n.default?n.default.getCurrentInvertColorsState(t,c):c(c)})},isReduceMotionEnabled:function(){return new(r(d[2]))(function(t,c){n.default?n.default.getCurrentReduceMotionState(t,c):c(c)})},isReduceTransparencyEnabled:function(){return new(r(d[2]))(function(t,c){n.default?n.default.getCurrentReduceTransparencyState(t,c):c(c)})},isScreenReaderEnabled:function(){return new(r(d[2]))(function(t,c){n.default?n.default.getCurrentVoiceOverState(t,c):c(c)})},get fetch(){return console.warn('AccessibilityInfo.fetch is deprecated, call Accessibility.isScreenReaderEnabled instead'),this.isScreenReaderEnabled},addEventListener:function(n,o){var s;return'change'===n?s=r(d[3]).addListener(t.screenReaderChanged,o):t[n]&&(s=r(d[3]).addListener(n,o)),c.set(o,s),{remove:u.removeEventListener.bind(null,n,o)}},setAccessibilityFocus:function(t){n.default&&n.default.setAccessibilityFocus(t)},announceForAccessibility:function(t){n.default&&n.default.announceForAccessibility(t)},removeEventListener:function(n,t){var u=c.get(t);u&&(u.remove(),c.delete(t))}};m.exports=u},22,[1,23,43,48]); -__d(function(g,r,i,a,m,e,d){'use strict';Object.defineProperty(e,"__esModule",{value:!0}),e.default=void 0;var t=r(d[0])(r(d[1])).get('AccessibilityManager');e.default=t},23,[13,24]); -__d(function(g,r,i,a,m,e,d){'use strict';Object.defineProperty(e,"__esModule",{value:!0}),e.get=u,e.getEnforcing=function(t){var l=u(t);return(0,n.default)(null!=l,"TurboModuleRegistry.getEnforcing(...): '"+t+"' could not be found. Verify that a module by this name is registered in the native binary."),l};var n=r(d[0])(r(d[1])),t=g.__turboModuleProxy;function u(n){if(!g.RN$Bridgeless){var u=r(d[2])[n];if(null!=u)return u}return null!=t?t(n):null}},24,[1,25,26]); -__d(function(g,r,i,a,m,e,d){'use strict';m.exports=function(n,o,t,f,s,u,c,l){if(!n){var v;if(void 0===o)v=new Error("Minified exception occurred; use the non-minified dev environment for the full error message and additional helpful warnings.");else{var p=[t,f,s,u,c,l],h=0;(v=new Error(o.replace(/%s/g,function(){return p[h++]}))).name='Invariant Violation'}throw v.framesToPop=1,v}}},25,[]); -__d(function(g,r,i,a,m,e,d){'use strict';function n(n,t){if(!n)return null;var l=r(d[0])(n,5),f=l[0],s=l[1],c=l[2],v=l[3],h=l[4];if(r(d[1])(!f.startsWith('RCT')&&!f.startsWith('RK'),"Module name prefixes should've been stripped by the native side but wasn't for "+f),!s&&!c)return{name:f};var y={};return c&&c.forEach(function(n,l){var f=v&&u(v,l),s=h&&u(h,l);r(d[1])(!f||!s,'Cannot have a method that is both async and a sync hook');var c=f?'promise':s?'sync':'async';y[n]=o(t,l,c)}),r(d[2])(y,s),null==y.getConstants?y.getConstants=function(){return s||Object.freeze({})}:console.warn("Unable to define method 'getConstants()' on NativeModule '"+f+"'. NativeModule '"+f+"' already has a constant or method called 'getConstants'. Please remove it."),{name:f,module:y}}function t(t,o){r(d[1])(g.nativeRequireModuleConfig,"Can't lazily create module without nativeRequireModuleConfig");var u=n(g.nativeRequireModuleConfig(t),o);return u&&u.module}function o(n,t,o){var u=null;return(u='promise'===o?function(){for(var o=arguments.length,u=new Array(o),f=0;f0?l[l.length-1]:null,c=l.length>1?l[l.length-2]:null,v='function'==typeof s,h='function'==typeof c;h&&r(d[1])(v,'Cannot have a non-function arg after a function arg.');var y=v?s:null,C=h?c:null,M=v+h;if(l=l.slice(0,l.length-M),'sync'===o)return r(d[3]).callNativeSyncHook(n,t,l,C,y);r(d[3]).enqueueNativeCall(n,t,l,C,y)}).type=o,u}function u(n,t){return-1!==n.indexOf(t)}function l(n,t){return r(d[2])(t,n||{})}g.__fbGenNativeModule=n;var f={};if(g.nativeModuleProxy)f=g.nativeModuleProxy;else if(!g.nativeExtensions){var s=g.__fbBatchedBridgeConfig;r(d[1])(s,'__fbBatchedBridgeConfig is not set, cannot invoke native modules');var c=r(d[4]);(s.remoteModuleConfig||[]).forEach(function(o,u){var l=n(o,u);l&&(l.module?f[l.name]=l.module:c(f,l.name,{get:function(){return t(l.name,u)}}))})}m.exports=f},26,[27,25,17,33,42]); -__d(function(g,r,i,a,m,e,d){m.exports=function(n,t){return r(d[0])(n)||r(d[1])(n,t)||r(d[2])(n,t)||r(d[3])()}},27,[28,29,30,32]); -__d(function(g,r,i,a,m,e,d){m.exports=function(n){if(Array.isArray(n))return n}},28,[]); -__d(function(g,r,i,a,m,e,d){m.exports=function(t,n){if("undefined"!=typeof Symbol&&Symbol.iterator in Object(t)){var o=[],l=!0,f=!1,u=void 0;try{for(var y,c=t[Symbol.iterator]();!(l=(y=c.next()).done)&&(o.push(y.value),!n||o.length!==n);l=!0);}catch(t){f=!0,u=t}finally{try{l||null==c.return||c.return()}finally{if(f)throw u}}return o}}},29,[]); -__d(function(g,r,i,a,m,e,d){m.exports=function(t,n){if(t){if("string"==typeof t)return r(d[0])(t,n);var o=Object.prototype.toString.call(t).slice(8,-1);return"Object"===o&&t.constructor&&(o=t.constructor.name),"Map"===o||"Set"===o?Array.from(t):"Arguments"===o||/^(?:Ui|I)nt(?:8|16|32)(?:Clamped)?Array$/.test(o)?r(d[0])(t,n):void 0}}},30,[31]); -__d(function(g,r,i,a,m,e,d){m.exports=function(n,t){(null==t||t>n.length)&&(t=n.length);for(var l=0,o=new Array(t);l=5){var o=this._queue;this._queue=[[],[],[],this._callID],this._lastFlush=h,g.nativeFlushQueueImmediate(o)}r(d[2]).counterEvent('pending_js_to_native_queue',this._queue[0].length),this.__spy&&this.__spy({type:1,module:t+'',method:l,args:u})}},{key:"createDebugLookup",value:function(t,l,u){}},{key:"setImmediatesCallback",value:function(t){this._immediatesCallback=t}},{key:"__guard",value:function(t){if(this.__shouldPauseOnThrow())t();else try{t()}catch(t){r(d[3]).reportFatalError(t)}}},{key:"__shouldPauseOnThrow",value:function(){return'undefined'!=typeof DebuggerInternal&&!0===DebuggerInternal.shouldPauseOnThrow}},{key:"__callImmediates",value:function(){r(d[2]).beginEvent('JSTimers.callImmediates()'),null!=this._immediatesCallback&&this._immediatesCallback(),r(d[2]).endEvent()}},{key:"__callFunction",value:function(t,l,u){this._lastFlush=Date.now(),this._eventLoopStartTime=this._lastFlush,this.__spy?r(d[2]).beginEvent(t+"."+l+"("+r(d[4])(u)+")"):r(d[2]).beginEvent(t+"."+l+"(...)"),this.__spy&&this.__spy({type:0,module:t,method:l,args:u});var s=this.getCallableModule(t);r(d[5])(!!s,'Module %s is not a registered callable module (calling %s)',t,l),r(d[5])(!!s[l],'Method %s does not exist on module %s',l,t);var n=s[l].apply(s,u);return r(d[2]).endEvent(),n}},{key:"__invokeCallback",value:function(t,l){this._lastFlush=Date.now(),this._eventLoopStartTime=this._lastFlush;var u=t>>>1,s=1&t?this._successCallbacks.get(u):this._failureCallbacks.get(u);s&&(this._successCallbacks.delete(u),this._failureCallbacks.delete(u),s.apply(void 0,r(d[6])(l)))}}],[{key:"spy",value:function(l){t.prototype.__spy=!0===l?function(t){console.log((0===t.type?'N->JS':'JS->N')+" : "+(t.module?t.module+'.':'')+t.method+"("+JSON.stringify(t.args)+")")}:!1===l?null:l}}]),t})();m.exports=t},34,[5,6,35,36,37,25,38]); -__d(function(g,r,i,a,m,e,d){'use strict';var n=!1,t=0,c={installReactHook:function(){!0},setEnabled:function(t){n!==t&&(n=t)},isEnabled:function(){return n},beginEvent:function(t,c){n&&(t='function'==typeof t?t():t,g.nativeTraceBeginSection(131072,t,c))},endEvent:function(){n&&g.nativeTraceEndSection(131072)},beginAsyncEvent:function(c){var o=t;return n&&(t++,c='function'==typeof c?c():c,g.nativeTraceBeginAsyncSection(131072,c,o)),o},endAsyncEvent:function(t,c){n&&(t='function'==typeof t?t():t,g.nativeTraceEndAsyncSection(131072,t,c))},counterEvent:function(t,c){n&&(t='function'==typeof t?t():t,g.nativeTraceCounter&&g.nativeTraceCounter(131072,t,c))}};m.exports=c},35,[]); -__d(function(g,r,i,a,m,e,d){m.exports=g.ErrorUtils},36,[]); -__d(function(g,r,i,a,m,e,d){'use strict';m.exports=function(t){var n,f=typeof t;if(void 0===t)n='undefined';else if(null===t)n='null';else if('string'===f)n='"'+t+'"';else if('function'===f)try{n=t.toString()}catch(t){n='[function unknown]'}else if(t instanceof Error)n=t.name+': '+t.message;else try{n=JSON.stringify(t)}catch(f){if('function'==typeof t.toString)try{n=t.toString()}catch(t){}}return n||'["'+f+'" failed to stringify]'}},37,[]); -__d(function(g,r,i,a,m,e,d){m.exports=function(n){return r(d[0])(n)||r(d[1])(n)||r(d[2])(n)||r(d[3])()}},38,[39,40,30,41]); -__d(function(g,r,i,a,m,e,d){m.exports=function(n){if(Array.isArray(n))return r(d[0])(n)}},39,[31]); -__d(function(g,r,i,a,m,e,d){m.exports=function(n){if("undefined"!=typeof Symbol&&Symbol.iterator in Object(n))return Array.from(n)}},40,[]); -__d(function(g,r,i,a,m,e,d){m.exports=function(){throw new TypeError("Invalid attempt to spread non-iterable instance.\nIn order to be iterable, non-array objects must have a [Symbol.iterator]() method.")}},41,[]); -__d(function(g,r,i,a,m,e,d){'use strict';m.exports=function(t,n,u){var b,c=u.get,o=!1!==u.enumerable,f=!1!==u.writable,l=!1;function s(u){b=u,l=!0,Object.defineProperty(t,n,{value:u,configurable:!0,enumerable:o,writable:f})}Object.defineProperty(t,n,{get:function(){return l||(l=!0,s(c())),b},set:s,configurable:!0,enumerable:o})}},42,[]); -__d(function(g,r,i,a,m,e,d){'use strict';r(d[0]),r(d[1]),m.exports=r(d[2])},43,[44,46,47]); -__d(function(g,r,i,a,m,e,d){'use strict';m.exports=r(d[0]),r(d[0]).prototype.done=function(t,n){(arguments.length?this.then.apply(this,arguments):this).then(null,function(t){setTimeout(function(){throw t},0)})}},44,[45]); -__d(function(g,r,i,a,m,e,d){'use strict';function n(){}var t=null,o={};function u(n){try{return n.then}catch(n){return t=n,o}}function f(n,u){try{return n(u)}catch(n){return t=n,o}}function c(n,u,f){try{n(u,f)}catch(n){return t=n,o}}function _(t){if('object'!=typeof this)throw new TypeError('Promises must be constructed via new');if('function'!=typeof t)throw new TypeError('Promise constructor\'s argument is not a function');this._40=0,this._65=0,this._55=null,this._72=null,t!==n&&b(t,this)}function s(t,o,u){return new t.constructor(function(f,c){var s=new _(n);s.then(f,c),l(t,new w(o,u,s))})}function l(n,t){for(;3===n._65;)n=n._55;if(_._37&&_._37(n),0===n._65)return 0===n._40?(n._40=1,void(n._72=t)):1===n._40?(n._40=2,void(n._72=[n._72,t])):void n._72.push(t);h(n,t)}function h(n,u){setImmediate(function(){var c=1===n._65?u.onFulfilled:u.onRejected;if(null!==c){var _=f(c,n._55);_===o?v(u.promise,t):p(u.promise,_)}else 1===n._65?p(u.promise,n._55):v(u.promise,n._55)})}function p(n,f){if(f===n)return v(n,new TypeError('A promise cannot be resolved with itself.'));if(f&&('object'==typeof f||'function'==typeof f)){var c=u(f);if(c===o)return v(n,t);if(c===n.then&&f instanceof _)return n._65=3,n._55=f,void y(n);if('function'==typeof c)return void b(c.bind(f),n)}n._65=1,n._55=f,y(n)}function v(n,t){n._65=2,n._55=t,_._87&&_._87(n,t),y(n)}function y(n){if(1===n._40&&(l(n,n._72),n._72=null),2===n._40){for(var t=0;t0&&(x=new Set,N(-1))},e.setSelectedLog=N,e.clearWarnings=function(){var t=Array.from(x).filter(function(t){return'warn'!==t.level});t.length!==x.size&&(x=new Set(t),N(-1),C())},e.clearErrors=function(){var t=Array.from(x).filter(function(t){return'error'!==t.level&&'fatal'!==t.level});t.length!==x.size&&(x=new Set(t),N(-1))},e.dismiss=B,e.setWarningFilter=function(t){D=t},e.setAppInfo=function(t){E=t},e.getAppInfo=function(){return null!=E?E():null},e.checkWarningFilter=function(t){return D(t)},e.addIgnorePatterns=function(t){var n=t.filter(function(t){if(t instanceof RegExp){for(var n,o=p(w.entries());!(n=o()).done;){var u=n.value;if(u instanceof RegExp&&u.toString()===t.toString())return!1}return!0}return!w.has(t)});if(0===n.length)return;for(var o,u=p(n);!(o=u()).done;){var l=o.value;w.add(l),x=new Set(Array.from(x).filter(function(t){return!z(t.message.content)}))}C()},e.setDisabled=function(t){if(t===L)return;L=t,C()},e.isDisabled=function(){return L},e.observe=P,e.withSubscription=function(c){return(function(f){(0,o.default)(b,f);var v,p,h=(v=b,p=y(),function(){var t,n=(0,l.default)(v);if(p){var o=(0,l.default)(this).constructor;t=Reflect.construct(n,arguments,o)}else t=n.apply(this,arguments);return(0,u.default)(this,t)});function b(){var n;(0,t.default)(this,b);for(var o=arguments.length,u=new Array(o),l=0;l=l.length-1&&N(o-1),B(l[o]))},n._handleMinimize=function(){N(-1)},n._handleSetSelectedLog=function(t){N(t)},n}return(0,n.default)(b,[{key:"componentDidCatch",value:function(t,n){R(t,n.componentStack)}},{key:"render",value:function(){return this.state.hasError?null:s.createElement(c,{logs:Array.from(this.state.logs),isDisabled:this.state.isDisabled,selectedLogIndex:this.state.selectedLogIndex})}},{key:"componentDidMount",value:function(){var t=this;this._subscription=P(function(n){t.setState(n)})}},{key:"componentWillUnmount",value:function(){null!=this._subscription&&this._subscription.unsubscribe()}}],[{key:"getDerivedStateFromError",value:function(){return{hasError:!0}}}]),b})(s.Component)};var t=r(d[0])(r(d[1])),n=r(d[0])(r(d[2])),o=r(d[0])(r(d[3])),u=r(d[0])(r(d[4])),l=r(d[0])(r(d[5])),s=r(d[6])(r(d[7])),c=r(d[0])(r(d[8])),f=r(d[0])(r(d[9])),v=r(d[0])(r(d[10]));function y(){if("undefined"==typeof Reflect||!Reflect.construct)return!1;if(Reflect.construct.sham)return!1;if("function"==typeof Proxy)return!0;try{return Date.prototype.toString.call(Reflect.construct(Date,[],function(){})),!0}catch(t){return!1}}function p(t,n){var o;if("undefined"==typeof Symbol||null==t[Symbol.iterator]){if(Array.isArray(t)||(o=h(t))||n&&t&&"number"==typeof t.length){o&&(t=o);var u=0;return function(){return u>=t.length?{done:!0}:{done:!1,value:t[u++]}}}throw new TypeError("Invalid attempt to iterate non-iterable instance.\nIn order to be iterable, non-array objects must have a [Symbol.iterator]() method.")}return(o=t[Symbol.iterator]()).next.bind(o)}function h(t,n){if(t){if("string"==typeof t)return b(t,n);var o=Object.prototype.toString.call(t).slice(8,-1);return"Object"===o&&t.constructor&&(o=t.constructor.name),"Map"===o||"Set"===o?Array.from(t):"Arguments"===o||/^(?:Ui|I)nt(?:8|16|32)(?:Clamped)?Array$/.test(o)?b(t,n):void 0}}function b(t,n){(null==n||n>t.length)&&(n=t.length);for(var o=0,u=new Array(n);o=0;){if('syntax'===u[l].level){o=l;break}l-=1}I=o,C(),v.default&&setTimeout(function(){n<0&&o>=0?v.default.show():n>=0&&o<0&&v.default.hide()},0)}function B(t){x.has(t)&&(x.delete(t),C())}function P(t){var n={observer:t};return S.add(n),t(_()),{unsubscribe:function(){S.delete(n)}}}},66,[1,5,6,7,9,12,13,14,67,74,77,61,78]); -__d(function(g,r,i,a,m,e,d){'use strict';Object.defineProperty(e,"__esModule",{value:!0}),e.default=void 0;var t=r(d[0])(r(d[1])),s=r(d[0])(r(d[2])),l=r(d[3])(r(d[4])),u=(function(){function u(s){(0,t.default)(this,u),this.symbolicated={error:null,stack:null,status:'NONE'},this.level=s.level,this.message=s.message,this.stack=s.stack,this.category=s.category,this.componentStack=s.componentStack,this.codeFrame=s.codeFrame,this.isComponentError=s.isComponentError,this.count=1}return(0,s.default)(u,[{key:"incrementCount",value:function(){this.count+=1}},{key:"getAvailableStack",value:function(){return'COMPLETE'===this.symbolicated.status?this.symbolicated.stack:this.stack}},{key:"retrySymbolicate",value:function(t){'COMPLETE'!==this.symbolicated.status&&(l.deleteStack(this.stack),this.handleSymbolicate(t))}},{key:"symbolicate",value:function(t){'NONE'===this.symbolicated.status&&this.handleSymbolicate(t)}},{key:"handleSymbolicate",value:function(t){var s=this;'PENDING'!==this.symbolicated.status&&(this.updateStatus(null,null,null,t),l.symbolicate(this.stack).then(function(l){s.updateStatus(null,null==l?void 0:l.stack,null==l?void 0:l.codeFrame,t)},function(l){s.updateStatus(l,null,null,t)}))}},{key:"updateStatus",value:function(t,s,l,u){var n=this.symbolicated.status;null!=t?this.symbolicated={error:t,stack:null,status:'FAILED'}:null!=s?(l&&(this.codeFrame=l),this.symbolicated={error:null,stack:s,status:'COMPLETE'}):this.symbolicated={error:null,stack:null,status:'PENDING'},u&&n!==this.symbolicated.status&&u(this.symbolicated.status)}}]),u})();e.default=u},67,[1,5,6,13,68]); -__d(function(g,r,i,a,m,e,d){'use strict';Object.defineProperty(e,"__esModule",{value:!0}),e.deleteStack=function(t){c.delete(t)},e.symbolicate=function(n){var o=c.get(n);null==o&&(o=(0,t.default)(n).then(u),c.set(n,o));return o};var t=r(d[0])(r(d[1]));function n(t,n){var l;if("undefined"==typeof Symbol||null==t[Symbol.iterator]){if(Array.isArray(t)||(l=o(t))||n&&t&&"number"==typeof t.length){l&&(t=l);var c=0;return function(){return c>=t.length?{done:!0}:{done:!1,value:t[c++]}}}throw new TypeError("Invalid attempt to iterate non-iterable instance.\nIn order to be iterable, non-array objects must have a [Symbol.iterator]() method.")}return(l=t[Symbol.iterator]()).next.bind(l)}function o(t,n){if(t){if("string"==typeof t)return l(t,n);var o=Object.prototype.toString.call(t).slice(8,-1);return"Object"===o&&t.constructor&&(o=t.constructor.name),"Map"===o||"Set"===o?Array.from(t):"Arguments"===o||/^(?:Ui|I)nt(?:8|16|32)(?:Clamped)?Array$/.test(o)?l(t,n):void 0}}function l(t,n){(null==n||n>t.length)&&(n=t.length);for(var o=0,l=new Array(n);o-1};function f(t){if('string'!=typeof t&&(t=String(t)),/[^a-z0-9\-#$%&'*+.^_`|~!]/i.test(t)||''===t)throw new TypeError('Invalid character in header field name');return t.toLowerCase()}function u(t){return'string'!=typeof t&&(t=String(t)),t}function c(t){var o={next:function(){var o=t.shift();return{done:void 0===o,value:o}}};return n.iterable&&(o[Symbol.iterator]=function(){return o}),o}function y(t){this.map={},t instanceof y?t.forEach(function(t,o){this.append(o,t)},this):Array.isArray(t)?t.forEach(function(t){this.append(t[0],t[1])},this):t&&Object.getOwnPropertyNames(t).forEach(function(o){this.append(o,t[o])},this)}function l(t){if(t.bodyUsed)return Promise.reject(new TypeError('Already read'));t.bodyUsed=!0}function p(t){return new Promise(function(o,n){t.onload=function(){o(t.result)},t.onerror=function(){n(t.error)}})}function b(t){var o=new FileReader,n=p(o);return o.readAsArrayBuffer(t),n}function w(t){for(var o=new Uint8Array(t),n=new Array(o.length),s=0;s-1?s:n),this.mode=o.mode||this.mode||null,this.signal=o.signal||this.signal,this.referrer=null,('GET'===this.method||'HEAD'===this.method)&&h)throw new TypeError('Body not allowed for GET or HEAD requests');if(this._initBody(h),!('GET'!==this.method&&'HEAD'!==this.method||'no-store'!==o.cache&&'no-cache'!==o.cache)){var f=/([?&])_=[^&]*/;if(f.test(this.url))this.url=this.url.replace(f,'$1_='+(new Date).getTime());else{this.url+=(/\?/.test(this.url)?'&':'?')+'_='+(new Date).getTime()}}}function A(t){var o=new FormData;return t.trim().split('&').forEach(function(t){if(t){var n=t.split('='),s=n.shift().replace(/\+/g,' '),h=n.join('=').replace(/\+/g,' ');o.append(decodeURIComponent(s),decodeURIComponent(h))}}),o}function B(t,o){if(!(this instanceof B))throw new TypeError('Please use the "new" operator, this DOM object constructor cannot be called as a function.');o||(o={}),this.type='default',this.status=void 0===o.status?200:o.status,this.ok=this.status>=200&&this.status<300,this.statusText='statusText'in o?o.statusText:'',this.headers=new y(o.headers),this.url=o.url||'',this._initBody(t)}T.prototype.clone=function(){return new T(this,{body:this._bodyInit})},_.call(T.prototype),_.call(B.prototype),B.prototype.clone=function(){return new B(this._bodyInit,{status:this.status,statusText:this.statusText,headers:new y(this.headers),url:this.url})},B.error=function(){var t=new B(null,{status:0,statusText:''});return t.type='error',t};var x=[301,302,303,307,308];B.redirect=function(t,o){if(-1===x.indexOf(o))throw new RangeError('Invalid status code');return new B(null,{status:o,headers:{location:t}})},t.DOMException=o.DOMException;try{new t.DOMException}catch(o){t.DOMException=function(t,o){this.message=t,this.name=o;var n=Error(t);this.stack=n.stack},t.DOMException.prototype=Object.create(Error.prototype),t.DOMException.prototype.constructor=t.DOMException}function O(s,h){return new Promise(function(f,c){var l=new T(s,h);if(l.signal&&l.signal.aborted)return c(new t.DOMException('Aborted','AbortError'));var p=new XMLHttpRequest;function b(){p.abort()}p.onload=function(){var t,o,n={status:p.status,statusText:p.statusText,headers:(t=p.getAllResponseHeaders()||'',o=new y,t.replace(/\r?\n[\t ]+/g,' ').split('\r').map(function(t){return 0===t.indexOf('\n')?t.substr(1,t.length):t}).forEach(function(t){var n=t.split(':'),s=n.shift().trim();if(s){var h=n.join(':').trim();o.append(s,h)}}),o)};n.url='responseURL'in p?p.responseURL:n.headers.get('X-Request-URL');var s='response'in p?p.response:p.responseText;setTimeout(function(){f(new B(s,n))},0)},p.onerror=function(){setTimeout(function(){c(new TypeError('Network request failed'))},0)},p.ontimeout=function(){setTimeout(function(){c(new TypeError('Network request failed'))},0)},p.onabort=function(){setTimeout(function(){c(new t.DOMException('Aborted','AbortError'))},0)},p.open(l.method,(function(t){try{return''===t&&o.location.href?o.location.href:t}catch(o){return t}})(l.url),!0),'include'===l.credentials?p.withCredentials=!0:'omit'===l.credentials&&(p.withCredentials=!1),'responseType'in p&&(n.blob?p.responseType='blob':n.arrayBuffer&&l.headers.get('Content-Type')&&-1!==l.headers.get('Content-Type').indexOf('application/octet-stream')&&(p.responseType='arraybuffer')),!h||'object'!=typeof h.headers||h.headers instanceof y?l.headers.forEach(function(t,o){p.setRequestHeader(o,t)}):Object.getOwnPropertyNames(h.headers).forEach(function(t){p.setRequestHeader(t,u(h.headers[t]))}),l.signal&&(l.signal.addEventListener('abort',b),p.onreadystatechange=function(){4===p.readyState&&l.signal.removeEventListener('abort',b)}),p.send(void 0===l._bodyInit?null:l._bodyInit)})}O.polyfill=!0,o.fetch||(o.fetch=O,o.Headers=y,o.Request=T,o.Response=B),t.Headers=y,t.Request=T,t.Response=B,t.fetch=O,Object.defineProperty(t,'__esModule',{value:!0})},'object'==typeof e&&void 0!==m?o(e):'function'==typeof define&&define.amd?define(['exports'],o):o(t.WHATWGFetch={})},72,[]); -__d(function(g,r,i,a,m,e,d){'use strict';var t,n=r(d[0])(r(d[1])),l='http://localhost:8081/';m.exports=function(){if(void 0===t){var o=n.default.getConstants().scriptURL.match(/^https?:\/\/.*?\//);t=o?o[0]:null}return{url:t||l,bundleLoadedFromServer:null!==t}}},73,[1,70]); -__d(function(g,r,i,a,m,e,d){'use strict';function t(t,n){var o=Object.keys(t);if(Object.getOwnPropertySymbols){var c=Object.getOwnPropertySymbols(t);n&&(c=c.filter(function(n){return Object.getOwnPropertyDescriptor(t,n).enumerable})),o.push.apply(o,c)}return o}function n(n){for(var o=1;o=t.length?{done:!0}:{done:!1,value:t[u++]}}}throw new TypeError("Invalid attempt to iterate non-iterable instance.\nIn order to be iterable, non-array objects must have a [Symbol.iterator]() method.")}return(o=t[Symbol.iterator]()).next.bind(o)}function c(t,n){if(t){if("string"==typeof t)return u(t,n);var o=Object.prototype.toString.call(t).slice(8,-1);return"Object"===o&&t.constructor&&(o=t.constructor.name),"Map"===o||"Set"===o?Array.from(t):"Arguments"===o||/^(?:Ui|I)nt(?:8|16|32)(?:Clamped)?Array$/.test(o)?u(t,n):void 0}}function u(t,n){(null==n||n>t.length)&&(n=t.length);for(var o=0,c=new Array(n);o|\/|[a-z]:\\|\\\\).*?)(?::(\d+))?(?::(\d+))?\)?\s*$/i,u=/\((\S*)(?::(\d+))(?::(\d+))\)/;function t(t){var o=l.exec(t);if(!o)return null;var c=o[2]&&0===o[2].indexOf('native'),s=o[2]&&0===o[2].indexOf('eval'),v=u.exec(o[2]);return s&&null!=v&&(o[2]=v[1],o[3]=v[2],o[4]=v[3]),{file:c?null:o[2],methodName:o[1]||n,arguments:c?[o[2]]:[],lineNumber:o[3]?+o[3]:null,column:o[4]?+o[4]:null}}var o=/^\s*at (?:((?:\[object object\])?.+) )?\(?((?:file|ms-appx|https?|webpack|blob):.*?):(\d+)(?::(\d+))?\)?\s*$/i;function c(l){var u=o.exec(l);return u?{file:u[2],methodName:u[1]||n,arguments:[],lineNumber:+u[3],column:u[4]?+u[4]:null}:null}var s=/^\s*(.*?)(?:\((.*?)\))?(?:^|@)((?:file|https?|blob|chrome|webpack|resource|\[native).*?|[^@]*bundle)(?::(\d+))?(?::(\d+))?\s*$/i,v=/(\S+) line (\d+)(?: > eval line \d+)* > eval/i;function f(l){var u=s.exec(l);if(!u)return null;var t=u[3]&&u[3].indexOf(' > eval')>-1,o=v.exec(u[3]);return t&&null!=o&&(u[3]=o[1],u[4]=o[2],u[5]=null),{file:u[3],methodName:u[1]||n,arguments:u[2]?u[2].split(','):[],lineNumber:u[4]?+u[4]:null,column:u[5]?+u[5]:null}}var b=/^\s*(?:([^@]*)(?:\((.*?)\))?@)?(\S.*?):(\d+)(?::(\d+))?\s*$/i;function p(l){var u=b.exec(l);return u?{file:u[3],methodName:u[1]||n,arguments:[],lineNumber:+u[4],column:u[5]?+u[5]:null}:null}var x=/^\s*at (?:((?:\[object object\])?[^\\/]+(?: \[as \S+\])?) )?\(?(.*?):(\d+)(?::(\d+))?\)?\s*$/i;function h(l){var u=x.exec(l);return u?{file:u[2],methodName:u[1]||n,arguments:[],lineNumber:+u[3],column:u[4]?+u[4]:null}:null}e.parse=function(n){return n.split('\n').reduce(function(n,l){var u=t(l)||c(l)||f(l)||h(l)||p(l);return u&&n.push(u),n},[])}},75,[]); -__d(function(g,r,i,a,m,e,d){'use strict';var t=/^ {4}at (.+?)(?: \((native)\)?| \((address at )?(.+?):(\d+):(\d+)\))$/,n=/^ {4}... skipping (\d+) frames$/;function s(s){var u=s.match(t);if(u)return{type:'FRAME',functionName:u[1],location:'native'===u[2]?{type:'NATIVE'}:'address at '===u[3]?{type:'BYTECODE',sourceUrl:u[4],line1Based:Number.parseInt(u[5],10),virtualOffset0Based:Number.parseInt(u[6],10)}:{type:'SOURCE',sourceUrl:u[4],line1Based:Number.parseInt(u[5],10),column1Based:Number.parseInt(u[6],10)}};var p=s.match(n);return p?{type:'SKIPPED',count:Number.parseInt(p[1],10)}:void 0}m.exports=function(t){for(var n=t.split(/\n/),u=[],p=-1,o=0;o0){var F=t[t.length-1];'string'==typeof F&&/\s{4}in/.test(F)&&((u=t.slice(0,-1))[0]=n.slice(0,-2),o=b(F))}if(0===o.length)for(var s,f=c(t);!(s=f()).done;){var D=s.value;'string'==typeof D&&/^\n {4}in/.exec(D)?o=b(D):u.push(D)}return l(l({},h(u)),{},{componentStack:o})};var t=r(d[0])(r(d[1])),n=r(d[0])(r(d[2])),u=r(d[0])(r(d[3])),o=r(d[0])(r(d[4])),F=r(d[0])(r(d[5]));function s(t,n){var u=Object.keys(t);if(Object.getOwnPropertySymbols){var o=Object.getOwnPropertySymbols(t);n&&(o=o.filter(function(n){return Object.getOwnPropertyDescriptor(t,n).enumerable})),u.push.apply(u,o)}return u}function l(n){for(var u=1;u=t.length?{done:!0}:{done:!1,value:t[o++]}}}throw new TypeError("Invalid attempt to iterate non-iterable instance.\nIn order to be iterable, non-array objects must have a [Symbol.iterator]() method.")}return(u=t[Symbol.iterator]()).next.bind(u)}function f(t,n){if(t){if("string"==typeof t)return D(t,n);var u=Object.prototype.toString.call(t).slice(8,-1);return"Object"===u&&t.constructor&&(u=t.constructor.name),"Map"===u||"Set"===u?Array.from(t):"Arguments"===u||/^(?:Ui|I)nt(?:8|16|32)(?:Clamped)?Array$/.test(u)?D(t,n):void 0}}function D(t,n){(null==n||n>t.length)&&(n=t.length);for(var u=0,o=new Array(n);u]{2}[\t-\r 0-9\xA0\u1680\u2000-\u200A\u2028\u2029\u202F\u205F\u3000\uFEFF]+ \|(?:[\0-\uD7FF\uE000-\uFFFF]|[\uD800-\uDBFF][\uDC00-\uDFFF]|[\uD800-\uDBFF](?![\uDC00-\uDFFF])|(?:[^\uD800-\uDBFF]|^)[\uDC00-\uDFFF])+|\x1B(?:[\0-\uD7FF\uE000-\uFFFF]|[\uD800-\uDBFF][\uDC00-\uDFFF]|[\uD800-\uDBFF](?![\uDC00-\uDFFF])|(?:[^\uD800-\uDBFF]|^)[\uDC00-\uDFFF])+)/,v=o.default.BOM+'%s';function h(t){var n=[],o=[],s=[],l=(0,u.default)(t);if('string'==typeof l[0]){for(var f,D=String(l.shift()).split('%s'),p=D.length-1,y=l.splice(0,p),h='',b='',C=0,B=c(D);!(f=B()).done;){var E=f.value;if(h+=E,b+=E,C2&&void 0!==arguments[2]?arguments[2]:-1,s=arguments.length>3?arguments[3]:void 0,c='number'==typeof f?s:f,l='number'==typeof f?f:-1;if(0===l)return!0;if(o===u)return!1;if('function'==typeof o&&'function'==typeof u){var v=null==c?void 0:c.unsafelyIgnoreFunctions;return null==v&&(!n||!n.onDifferentFunctionsIgnored||c&&'unsafelyIgnoreFunctions'in c||n.onDifferentFunctionsIgnored(o.name,u.name),v=!0),!v}if('object'!=typeof o||null===o)return o!==u;if('object'!=typeof u||null===u)return!0;if(o.constructor!==u.constructor)return!0;if(Array.isArray(o)){var y=o.length;if(u.length!==y)return!0;for(var p=0;p1&&(t-=1),t<.16666666666666666?l+6*(n-l)*t:t<.5?n:t<.6666666666666666?l+(n-l)*(.6666666666666666-t)*6:l}function n(n,t,o){var u=o<.5?o*(1+t):o+t-o*t,s=2*o-u,h=l(s,u,n+.3333333333333333),c=l(s,u,n),b=l(s,u,n-.3333333333333333);return Math.round(255*h)<<24|Math.round(255*c)<<16|Math.round(255*b)<<8}var t,o='[-+]?\\d*\\.?\\d+',u="[-+]?\\d*\\.?\\d+%";function s(){for(var l=arguments.length,n=new Array(l),t=0;t255?255:n}function c(l){return(parseFloat(l)%360+360)%360/360}function b(l){var n=parseFloat(l);return n<0?0:n>1?255:Math.round(255*n)}function p(l){var n=parseFloat(l);return n<0?0:n>100?1:n/100}var y={transparent:0,aliceblue:4042850303,antiquewhite:4209760255,aqua:16777215,aquamarine:2147472639,azure:4043309055,beige:4126530815,bisque:4293182719,black:255,blanchedalmond:4293643775,blue:65535,blueviolet:2318131967,brown:2771004159,burlywood:3736635391,burntsienna:3934150143,cadetblue:1604231423,chartreuse:2147418367,chocolate:3530104575,coral:4286533887,cornflowerblue:1687547391,cornsilk:4294499583,crimson:3692313855,cyan:16777215,darkblue:35839,darkcyan:9145343,darkgoldenrod:3095792639,darkgray:2846468607,darkgreen:6553855,darkgrey:2846468607,darkkhaki:3182914559,darkmagenta:2332068863,darkolivegreen:1433087999,darkorange:4287365375,darkorchid:2570243327,darkred:2332033279,darksalmon:3918953215,darkseagreen:2411499519,darkslateblue:1211993087,darkslategray:793726975,darkslategrey:793726975,darkturquoise:13554175,darkviolet:2483082239,deeppink:4279538687,deepskyblue:12582911,dimgray:1768516095,dimgrey:1768516095,dodgerblue:512819199,firebrick:2988581631,floralwhite:4294635775,forestgreen:579543807,fuchsia:4278255615,gainsboro:3705462015,ghostwhite:4177068031,gold:4292280575,goldenrod:3668254975,gray:2155905279,green:8388863,greenyellow:2919182335,grey:2155905279,honeydew:4043305215,hotpink:4285117695,indianred:3445382399,indigo:1258324735,ivory:4294963455,khaki:4041641215,lavender:3873897215,lavenderblush:4293981695,lawngreen:2096890111,lemonchiffon:4294626815,lightblue:2916673279,lightcoral:4034953471,lightcyan:3774873599,lightgoldenrodyellow:4210742015,lightgray:3553874943,lightgreen:2431553791,lightgrey:3553874943,lightpink:4290167295,lightsalmon:4288707327,lightseagreen:548580095,lightskyblue:2278488831,lightslategray:2005441023,lightslategrey:2005441023,lightsteelblue:2965692159,lightyellow:4294959359,lime:16711935,limegreen:852308735,linen:4210091775,magenta:4278255615,maroon:2147483903,mediumaquamarine:1724754687,mediumblue:52735,mediumorchid:3126187007,mediumpurple:2473647103,mediumseagreen:1018393087,mediumslateblue:2070474495,mediumspringgreen:16423679,mediumturquoise:1221709055,mediumvioletred:3340076543,midnightblue:421097727,mintcream:4127193855,mistyrose:4293190143,moccasin:4293178879,navajowhite:4292783615,navy:33023,oldlace:4260751103,olive:2155872511,olivedrab:1804477439,orange:4289003775,orangered:4282712319,orchid:3664828159,palegoldenrod:4008225535,palegreen:2566625535,paleturquoise:2951671551,palevioletred:3681588223,papayawhip:4293907967,peachpuff:4292524543,peru:3448061951,pink:4290825215,plum:3718307327,powderblue:2967529215,purple:2147516671,rebeccapurple:1714657791,red:4278190335,rosybrown:3163525119,royalblue:1097458175,saddlebrown:2336560127,salmon:4202722047,sandybrown:4104413439,seagreen:780883967,seashell:4294307583,sienna:2689740287,silver:3233857791,skyblue:2278484991,slateblue:1784335871,slategray:1887473919,slategrey:1887473919,snow:4294638335,springgreen:16744447,steelblue:1182971135,tan:3535047935,teal:8421631,thistle:3636451583,tomato:4284696575,turquoise:1088475391,violet:4001558271,wheat:4125012991,white:4294967295,whitesmoke:4126537215,yellow:4294902015,yellowgreen:2597139199};m.exports=function(l){var k,f=(void 0===t&&(t={rgb:new RegExp('rgb'+s(o,o,o)),rgba:new RegExp('rgba'+s(o,o,o,o)),hsl:new RegExp('hsl'+s(o,u,u)),hsla:new RegExp('hsla'+s(o,u,u,o)),hex3:/^#([0-9a-fA-F]{1})([0-9a-fA-F]{1})([0-9a-fA-F]{1})$/,hex4:/^#([0-9a-fA-F]{1})([0-9a-fA-F]{1})([0-9a-fA-F]{1})([0-9a-fA-F]{1})$/,hex6:/^#([0-9a-fA-F]{6})$/,hex8:/^#([0-9a-fA-F]{8})$/}),t);return'number'==typeof l?l>>>0===l&&l>=0&&l<=4294967295?l:null:(k=f.hex6.exec(l))?parseInt(k[1]+'ff',16)>>>0:y.hasOwnProperty(l)?y[l]:(k=f.rgb.exec(l))?(h(k[1])<<24|h(k[2])<<16|h(k[3])<<8|255)>>>0:(k=f.rgba.exec(l))?(h(k[1])<<24|h(k[2])<<16|h(k[3])<<8|b(k[4]))>>>0:(k=f.hex3.exec(l))?parseInt(k[1]+k[1]+k[2]+k[2]+k[3]+k[3]+'ff',16)>>>0:(k=f.hex8.exec(l))?parseInt(k[1],16)>>>0:(k=f.hex4.exec(l))?parseInt(k[1]+k[1]+k[2]+k[2]+k[3]+k[3]+k[4]+k[4],16)>>>0:(k=f.hsl.exec(l))?(255|n(c(k[1]),p(k[2]),p(k[3])))>>>0:(k=f.hsla.exec(l))?(n(c(k[1]),p(k[2]),p(k[3]))|b(k[4]))>>>0:null}},106,[]); -__d(function(g,r,i,a,m,e,d){'use strict';var t={transform:r(d[0]).arrayOf(r(d[0]).oneOfType([r(d[0]).shape({perspective:r(d[0]).number}),r(d[0]).shape({rotate:r(d[0]).string}),r(d[0]).shape({rotateX:r(d[0]).string}),r(d[0]).shape({rotateY:r(d[0]).string}),r(d[0]).shape({rotateZ:r(d[0]).string}),r(d[0]).shape({scale:r(d[0]).number}),r(d[0]).shape({scaleX:r(d[0]).number}),r(d[0]).shape({scaleY:r(d[0]).number}),r(d[0]).shape({translateX:r(d[0]).number}),r(d[0]).shape({translateY:r(d[0]).number}),r(d[0]).shape({skewX:r(d[0]).string}),r(d[0]).shape({skewY:r(d[0]).string})])),transformMatrix:function(t,s,n){if(t[s])return new Error("The transformMatrix style property is deprecated. Use `transform: [{ matrix: ... }]` instead.")},decomposedMatrix:function(t,s,n){if(t[s])return new Error("The decomposedMatrix style property is deprecated. Use `transform: [...]` instead.")},scaleX:r(d[1])(r(d[0]).number,'Use the transform prop instead.'),scaleY:r(d[1])(r(d[0]).number,'Use the transform prop instead.'),rotation:r(d[1])(r(d[0]).number,'Use the transform prop instead.'),translateX:r(d[1])(r(d[0]).number,'Use the transform prop instead.'),translateY:r(d[1])(r(d[0]).number,'Use the transform prop instead.')};m.exports=t},107,[101,108]); -__d(function(g,r,i,a,m,e,d){'use strict';m.exports=function(n,t){return function(o,c,u){r(d[0]).getViewManagerConfig(u)||void 0===o[c]||console.warn("`"+c+"` supplied to `"+u+"` has been deprecated. "+t);for(var p=arguments.length,s=new Array(p>3?p-3:0),f=3;f.49999*x?[0,2*Math.atan2(c,h)*T,90]:p<-.49999*x?[0,-2*Math.atan2(c,h)*T,-90]:[t.roundTo3Places(Math.atan2(2*c*h-2*v*f,1-2*M-2*C)*T),t.roundTo3Places(Math.atan2(2*v*h-2*c*f,1-2*l-2*C)*T),t.roundTo3Places(Math.asin(2*c*v+2*f*h)*T)]},roundTo3Places:function(t){var n=t.toString().split('e');return.001*Math.round(n[0]+'e'+(n[1]?+n[1]-3:3))},decomposeMatrix:function(n){r(d[1])(16===n.length,'Matrix decomposition needs a list of 3d matrix values, received %s',n);var o=[],u=[],s=[],c=[],v=[];if(n[15]){for(var f=[],h=[],M=0;M<4;M++){f.push([]);for(var l=0;l<4;l++){var C=n[4*M+l]/n[15];f[M].push(C),h.push(3===l?0:C)}}if(h[15]=1,t.determinant(h)){if(0!==f[0][3]||0!==f[1][3]||0!==f[2][3]){var p=[f[0][3],f[1][3],f[2][3],f[3][3]],x=t.inverse(h),T=t.transpose(x);o=t.multiplyVectorByMatrix(p,T)}else o[0]=o[1]=o[2]=0,o[3]=1;for(var y=0;y<3;y++)v[y]=f[3][y];for(var S=[],D=0;D<3;D++)S[D]=[f[D][0],f[D][1],f[D][2]];s[0]=t.v3Length(S[0]),S[0]=t.v3Normalize(S[0],s[0]),c[0]=t.v3Dot(S[0],S[1]),S[1]=t.v3Combine(S[1],S[0],1,-c[0]),c[0]=t.v3Dot(S[0],S[1]),S[1]=t.v3Combine(S[1],S[0],1,-c[0]),s[1]=t.v3Length(S[1]),S[1]=t.v3Normalize(S[1],s[1]),c[0]/=s[1],c[1]=t.v3Dot(S[0],S[2]),S[2]=t.v3Combine(S[2],S[0],1,-c[1]),c[2]=t.v3Dot(S[1],S[2]),S[2]=t.v3Combine(S[2],S[1],1,-c[2]),s[2]=t.v3Length(S[2]),S[2]=t.v3Normalize(S[2],s[2]),c[1]/=s[2],c[2]/=s[2];var P,q=t.v3Cross(S[1],S[2]);if(t.v3Dot(S[0],q)<0)for(var X=0;X<3;X++)s[X]*=-1,S[X][0]*=-1,S[X][1]*=-1,S[X][2]*=-1;return u[0]=.5*Math.sqrt(Math.max(1+S[0][0]-S[1][1]-S[2][2],0)),u[1]=.5*Math.sqrt(Math.max(1-S[0][0]+S[1][1]-S[2][2],0)),u[2]=.5*Math.sqrt(Math.max(1-S[0][0]-S[1][1]+S[2][2],0)),u[3]=.5*Math.sqrt(Math.max(1+S[0][0]+S[1][1]+S[2][2],0)),S[2][1]>S[1][2]&&(u[0]=-u[0]),S[0][2]>S[2][0]&&(u[1]=-u[1]),S[1][0]>S[0][1]&&(u[2]=-u[2]),{rotationDegrees:P=u[0]<.001&&u[0]>=0&&u[1]<.001&&u[1]>=0?[0,0,t.roundTo3Places(180*Math.atan2(S[0][1],S[0][0])/Math.PI)]:t.quaternionToDegreesXYZ(u,f,S),perspective:o,quaternion:u,scale:s,skew:c,translation:v,rotate:P[2],rotateX:P[0],rotateY:P[1],scaleX:s[0],scaleY:s[1],translateX:v[0],translateY:v[1]}}}}};m.exports=t},112,[27,25]); -__d(function(g,r,i,a,m,e,d){'use strict';var t={width:void 0,height:void 0};m.exports=function(h,n){return(h=h||t)!==(n=n||t)&&(h.width!==n.width||h.height!==n.height)}},113,[]); -__d(function(g,r,i,a,m,e,d){'use strict';m.exports=function(n){if(void 0===n||null===n)return n;var t=r(d[0])(n);return null!==t&&void 0!==t?t=(t<<24|t>>>8)>>>0:void 0}},114,[106]); -__d(function(g,r,i,a,m,e,d){'use strict';m.exports=function(t,n){return!(t===n||t&&n&&t[12]===n[12]&&t[13]===n[13]&&t[14]===n[14]&&t[5]===n[5]&&t[10]===n[10]&&t[1]===n[1]&&t[2]===n[2]&&t[3]===n[3]&&t[4]===n[4]&&t[6]===n[6]&&t[7]===n[7]&&t[8]===n[8]&&t[9]===n[9]&&t[11]===n[11]&&t[15]===n[15])}},115,[]); -__d(function(g,r,i,a,m,e,d){'use strict';var t={x:void 0,y:void 0};m.exports=function(n,o){return(n=n||t)!==(o=o||t)&&(n.x!==o.x||n.y!==o.y)}},116,[]); -__d(function(g,r,i,a,m,e,d){'use strict';var t={top:void 0,left:void 0,right:void 0,bottom:void 0};m.exports=function(o,f){return(o=o||t)!==(f=f||t)&&(o.top!==f.top||o.left!==f.left||o.right!==f.right||o.bottom!==f.bottom)}},117,[]); -__d(function(g,r,i,a,m,e,d){'use strict';m.exports=function(n){return null==n?null:n.map(r(d[0]))}},118,[114]); -__d(function(g,r,i,a,m,e,d){'use strict';var t,n,s,u;function o(){if(u)return u;var t=g.nativeExtensions&&g.nativeExtensions.SourceCode;return t||(t=r(d[0]).default),u=t.getConstants().scriptURL}function f(){if(void 0===n){var t=o(),s=t&&t.match(/^https?:\/\/.*?\//);n=s?s[0]:null}return n}function c(t){if(t){if(t.startsWith('assets://'))return null;(t=t.substring(0,t.lastIndexOf('/')+1)).includes('://')||(t='file://'+t)}return t}m.exports=function(n){if('object'==typeof n)return n;var u=r(d[1]).getAssetByID(n);if(!u)return null;var l=new(r(d[2]))(f(),(void 0===s&&(s=c(o())),s),u);return t?t(l):l.defaultAsset()},m.exports.pickScale=r(d[2]).pickScale,m.exports.setCustomSourceTransformer=function(n){t=n}},119,[70,120,121]); -__d(function(g,r,i,a,m,e,d){'use strict';var t=[];m.exports={registerAsset:function(s){return t.push(s)},getAssetByID:function(s){return t[s-1]}}},120,[]); -__d(function(g,r,i,a,m,e,d){'use strict';function t(t){var s=n.pickScale(t.scales,r(d[0]).get()),u=1===s?'':'@'+s+'x';return r(d[1]).getBasePath(t)+'/'+t.name+u+'.'+t.type}function s(t){var s=n.pickScale(t.scales,r(d[0]).get());return r(d[1]).getAndroidResourceFolderName(t,s)+'/'+r(d[1]).getAndroidResourceIdentifier(t)+'.'+t.type}var n=(function(){function n(t,s,u){r(d[2])(this,n),this.serverUrl=t,this.jsbundleUrl=s,this.asset=u}return r(d[3])(n,[{key:"isLoadedFromServer",value:function(){return!!this.serverUrl}},{key:"isLoadedFromFileSystem",value:function(){return!(!this.jsbundleUrl||!this.jsbundleUrl.startsWith('file://'))}},{key:"defaultAsset",value:function(){return this.isLoadedFromServer()?this.assetServerURL():this.scaledAssetURLNearBundle()}},{key:"assetServerURL",value:function(){return r(d[4])(!!this.serverUrl,'need server to load from'),this.fromSource(this.serverUrl+t(this.asset)+"?platform=ios&hash="+this.asset.hash)}},{key:"scaledAssetPath",value:function(){return this.fromSource(t(this.asset))}},{key:"scaledAssetURLNearBundle",value:function(){var s=this.jsbundleUrl||'file://';return this.fromSource(s+t(this.asset))}},{key:"resourceIdentifierWithoutScale",value:function(){return r(d[4])(!1,'resource identifiers work on Android'),this.fromSource(r(d[1]).getAndroidResourceIdentifier(this.asset))}},{key:"drawableFolderInBundle",value:function(){var t=this.jsbundleUrl||'file://';return this.fromSource(t+s(this.asset))}},{key:"fromSource",value:function(t){return{__packager_asset:!0,width:this.asset.width,height:this.asset.height,uri:t,scale:n.pickScale(this.asset.scales,r(d[0]).get())}}}],[{key:"pickScale",value:function(t,s){for(var n=0;n=s)return t[n];return t[t.length-1]||1}}]),n})();m.exports=n},121,[122,125,5,6,25]); -__d(function(g,r,i,a,m,e,d){'use strict';var t=(function(){function t(){r(d[0])(this,t)}return r(d[1])(t,null,[{key:"get",value:function(){return r(d[2]).get('window').scale}},{key:"getFontScale",value:function(){return r(d[2]).get('window').fontScale||t.get()}},{key:"getPixelSizeForLayoutSize",value:function(n){return Math.round(n*t.get())}},{key:"roundToNearestPixel",value:function(n){var u=t.get();return Math.round(n*u)/u}},{key:"startDetecting",value:function(){}}]),t})();m.exports=t},122,[5,6,123]); -__d(function(g,r,i,a,m,e,d){'use strict';var n,t=r(d[0])(r(d[1])),s=r(d[0])(r(d[2])),o=r(d[0])(r(d[3])),l=r(d[0])(r(d[4])),c=r(d[0])(r(d[5])),u=r(d[0])(r(d[6])),f=new o.default,v=!1,h=(function(){function o(){(0,t.default)(this,o)}return(0,s.default)(o,null,[{key:"get",value:function(t){return(0,u.default)(n[t],'No dimension set for key '+t),n[t]}},{key:"set",value:function(t){var s=t.screen,o=t.window,l=t.windowPhysicalPixels;l&&(o={width:l.width/l.scale,height:l.height/l.scale,scale:l.scale,fontScale:l.fontScale});var c=t.screenPhysicalPixels;c?s={width:c.width/c.scale,height:c.height/c.scale,scale:c.scale,fontScale:c.fontScale}:null==s&&(s=o),n={window:o,screen:s},v?f.emit('change',n):v=!0}},{key:"addEventListener",value:function(n,t){(0,u.default)('change'===n,'Trying to subscribe to unknown event: "%s"',n),f.addListener(n,t)}},{key:"removeEventListener",value:function(n,t){(0,u.default)('change'===n,'Trying to remove listener for unknown event: "%s"',n),f.removeListener(n,t)}}]),o})(),w=g.nativeExtensions&&g.nativeExtensions.DeviceInfo&&g.nativeExtensions.DeviceInfo.Dimensions;w||(l.default.addListener('didUpdateDimensions',function(n){h.set(n)}),w=c.default.getConstants().Dimensions),h.set(w),m.exports=h},123,[1,5,6,52,48,124,25]); -__d(function(g,r,i,a,m,e,d){'use strict';Object.defineProperty(e,"__esModule",{value:!0}),e.default=void 0;var t=r(d[0])(r(d[1])).getEnforcing('DeviceInfo');e.default=t},124,[13,24]); -__d(function(g,r,i,a,m,e,d){'use strict';var t={.75:'ldpi',1:'mdpi',1.5:'hdpi',2:'xhdpi',3:'xxhdpi',4:'xxxhdpi'};function n(n){if(n.toString()in t)return t[n.toString()];throw new Error('no such scale '+n.toString())}var o=new Set(['gif','jpeg','jpg','png','svg','webp','xml']);function s(t){var n=t.httpServerLocation;return'/'===n[0]&&(n=n.substr(1)),n}m.exports={getAndroidAssetSuffix:n,getAndroidResourceFolderName:function(s,u){if(!o.has(s.type))return'raw';var c=n(u);if(!c)throw new Error("Don't know which android drawable suffix to use for scale: "+u+'\nAsset: '+JSON.stringify(s,null,'\t')+'\nPossible scales are:'+JSON.stringify(t,null,'\t'));return'drawable-'+c},getAndroidResourceIdentifier:function(t){return(s(t)+'/'+t.name).toLowerCase().replace(/\//g,'_').replace(/([^a-z0-9_])/g,'').replace(/^assets_/,'')},getBasePath:s}},125,[]); -__d(function(g,r,i,a,m,e,d){m.exports=function(t,n){if(null==t)return{};var o,l,p=r(d[0])(t,n);if(Object.getOwnPropertySymbols){var b=Object.getOwnPropertySymbols(t);for(l=0;l=0||Object.prototype.propertyIsEnumerable.call(t,o)&&(p[o]=t[o])}return p}},126,[127]); -__d(function(g,r,i,a,m,e,d){m.exports=function(n,t){if(null==n)return{};var f,u,o={},c=Object.keys(n);for(u=0;u=0||(o[f]=n[f]);return o}},127,[]); -__d(function(g,r,i,a,m,e,d){'use strict';m.exports=r(d[0]).default},128,[129]); -__d(function(g,r,i,a,m,e,d){'use strict';Object.defineProperty(e,"__esModule",{value:!0}),e.default=e.Commands=e.__INTERNAL_VIEW_CONFIG=void 0;r(d[0])(r(d[1]));var t,_=r(d[2])(r(d[3]));t=r(d[4])('RCTView');e.__INTERNAL_VIEW_CONFIG=void 0;var o=(0,_.default)({supportedCommands:['hotspotUpdate','setPressed']});e.Commands=o;var s=t;e.default=s},129,[13,14,1,130,58]); -__d(function(g,r,i,a,m,e,d){'use strict';Object.defineProperty(e,"__esModule",{value:!0}),e.default=void 0;var n=function(n){var t={};return n.supportedCommands.forEach(function(n){t[n]=function(t){for(var o=arguments.length,u=new Array(o>1?o-1:0),f=1;fthis.eventPool.length&&this.eventPool.push(e)}function Y(e){e.eventPool=[],e.getPooled=H,e.release=Q}t(l[1])(j.prototype,{preventDefault:function(){this.defaultPrevented=!0;var e=this.nativeEvent;e&&(e.preventDefault?e.preventDefault():"unknown"!=typeof e.returnValue&&(e.returnValue=!1),this.isDefaultPrevented=W)},stopPropagation:function(){var e=this.nativeEvent;e&&(e.stopPropagation?e.stopPropagation():"unknown"!=typeof e.cancelBubble&&(e.cancelBubble=!0),this.isPropagationStopped=W)},persist:function(){this.isPersistent=W},isPersistent:B,destructor:function(){var e,t=this.constructor.Interface;for(e in t)this[e]=null;this.nativeEvent=this._targetInst=this.dispatchConfig=null,this.isPropagationStopped=this.isDefaultPrevented=B,this._dispatchInstances=this._dispatchListeners=null}}),j.Interface={type:null,target:null,currentTarget:function(){return null},eventPhase:null,bubbles:null,cancelable:null,timeStamp:function(e){return e.timeStamp||Date.now()},defaultPrevented:null,isTrusted:null},j.extend=function(e){function n(){}function r(){return i.apply(this,arguments)}var i=this;n.prototype=i.prototype;var a=new n;return t(l[1])(a,r.prototype),r.prototype=a,r.prototype.constructor=r,r.Interface=t(l[1])({},i.Interface,e),r.extend=i.extend,Y(r),r},Y(j);var V=j.extend({touchHistory:function(){return null}});function X(e){return"topTouchStart"===e}function q(e){return"topTouchMove"===e}var $=["topTouchStart"],K=["topTouchMove"],G=["topTouchCancel","topTouchEnd"],J=[],Z={touchBank:J,numberActiveTouches:0,indexOfSingleActiveTouch:-1,mostRecentTimeStamp:0};function ee(e){return e.timeStamp||e.timestamp}function te(e){if(null==(e=e.identifier))throw Error("Touch object is missing identifier.");return e}function ne(e){var t=te(e),n=J[t];n?(n.touchActive=!0,n.startPageX=e.pageX,n.startPageY=e.pageY,n.startTimeStamp=ee(e),n.currentPageX=e.pageX,n.currentPageY=e.pageY,n.currentTimeStamp=ee(e),n.previousPageX=e.pageX,n.previousPageY=e.pageY,n.previousTimeStamp=ee(e)):(n={touchActive:!0,startPageX:e.pageX,startPageY:e.pageY,startTimeStamp:ee(e),currentPageX:e.pageX,currentPageY:e.pageY,currentTimeStamp:ee(e),previousPageX:e.pageX,previousPageY:e.pageY,previousTimeStamp:ee(e)},J[t]=n),Z.mostRecentTimeStamp=ee(e)}function re(e){var t=J[te(e)];t?(t.touchActive=!0,t.previousPageX=t.currentPageX,t.previousPageY=t.currentPageY,t.previousTimeStamp=t.currentTimeStamp,t.currentPageX=e.pageX,t.currentPageY=e.pageY,t.currentTimeStamp=ee(e),Z.mostRecentTimeStamp=ee(e)):console.warn("Cannot record touch move without a touch start.\nTouch Move: %s\n","Touch Bank: %s",ae(e),le())}function ie(e){var t=J[te(e)];t?(t.touchActive=!1,t.previousPageX=t.currentPageX,t.previousPageY=t.currentPageY,t.previousTimeStamp=t.currentTimeStamp,t.currentPageX=e.pageX,t.currentPageY=e.pageY,t.currentTimeStamp=ee(e),Z.mostRecentTimeStamp=ee(e)):console.warn("Cannot record touch end without a touch start.\nTouch End: %s\n","Touch Bank: %s",ae(e),le())}function ae(e){return JSON.stringify({identifier:e.identifier,pageX:e.pageX,pageY:e.pageY,timestamp:ee(e)})}function le(){var e=JSON.stringify(J.slice(0,20));return 20l||(a=l),Pe(a,e,i)}}}),S=function(e){return ve.get(e._nativeTag)||null},x=be,_=function(e){var t=e.stateNode._nativeTag;if(void 0===t&&(t=e.stateNode.canonical._nativeTag),!t)throw Error("All native instances should have a tag.");return t},pe.injection.injectGlobalResponderHandler({onChange:function(e,n,r){null!==n?t(l[2]).UIManager.setJSResponder(n.stateNode._nativeTag,r):t(l[2]).UIManager.clearJSResponder()}}),t(l[3]).__SECRET_INTERNALS_DO_NOT_USE_OR_YOU_WILL_BE_FIRED.hasOwnProperty("ReactCurrentDispatcher")||(t(l[3]).__SECRET_INTERNALS_DO_NOT_USE_OR_YOU_WILL_BE_FIRED.ReactCurrentDispatcher={current:null}),t(l[3]).__SECRET_INTERNALS_DO_NOT_USE_OR_YOU_WILL_BE_FIRED.hasOwnProperty("ReactCurrentBatchConfig")||(t(l[3]).__SECRET_INTERNALS_DO_NOT_USE_OR_YOU_WILL_BE_FIRED.ReactCurrentBatchConfig={suspense:null});var Re="function"==typeof Symbol&&Symbol.for,Ce=Re?Symbol.for("react.element"):60103,Ne=Re?Symbol.for("react.portal"):60106,Ie=Re?Symbol.for("react.fragment"):60107,Ue=Re?Symbol.for("react.strict_mode"):60108,ze=Re?Symbol.for("react.profiler"):60114,Me=Re?Symbol.for("react.provider"):60109,Ae=Re?Symbol.for("react.context"):60110,De=Re?Symbol.for("react.concurrent_mode"):60111,Oe=Re?Symbol.for("react.forward_ref"):60112,Le=Re?Symbol.for("react.suspense"):60113,Fe=Re?Symbol.for("react.suspense_list"):60120,We=Re?Symbol.for("react.memo"):60115,Be=Re?Symbol.for("react.lazy"):60116;Re&&Symbol.for("react.fundamental"),Re&&Symbol.for("react.responder"),Re&&Symbol.for("react.scope");var je="function"==typeof Symbol&&Symbol.iterator;function He(e){return null===e||"object"!=typeof e?null:"function"==typeof(e=je&&e[je]||e["@@iterator"])?e:null}function Qe(e){if(-1===e._status){e._status=0;var t=e._ctor;t=t(),e._result=t,t.then(function(t){0===e._status&&(t=t.default,e._status=1,e._result=t)},function(t){0===e._status&&(e._status=2,e._result=t)})}}function Ye(e){if(null==e)return null;if("function"==typeof e)return e.displayName||e.name||null;if("string"==typeof e)return e;switch(e){case Ie:return"Fragment";case Ne:return"Portal";case ze:return"Profiler";case Ue:return"StrictMode";case Le:return"Suspense";case Fe:return"SuspenseList"}if("object"==typeof e)switch(e.$$typeof){case Ae:return"Context.Consumer";case Me:return"Context.Provider";case Oe:var t=e.render;return t=t.displayName||t.name||"",e.displayName||(""!==t?"ForwardRef("+t+")":"ForwardRef");case We:return Ye(e.type);case Be:if(e=1===e._status?e._result:null)return Ye(e)}return null}function Ve(e){var t=e,n=e;if(e.alternate)for(;t.return;)t=t.return;else{e=t;do{0!=(1026&(t=e).effectTag)&&(n=t.return),e=t.return}while(e)}return 3===t.tag?n:null}function Xe(e){if(Ve(e)!==e)throw Error("Unable to find node on an unmounted component.")}function qe(e){var t=e.alternate;if(!t){if(null===(t=Ve(e)))throw Error("Unable to find node on an unmounted component.");return t!==e?null:e}for(var n=e,r=t;;){var i=n.return;if(null===i)break;var a=i.alternate;if(null===a){if(null!==(r=i.return)){n=r;continue}break}if(i.child===a.child){for(a=i.child;a;){if(a===n)return Xe(i),e;if(a===r)return Xe(i),t;a=a.sibling}throw Error("Unable to find node on an unmounted component.")}if(n.return!==r.return)n=i,r=a;else{for(var l=!1,o=i.child;o;){if(o===n){l=!0,n=i,r=a;break}if(o===r){l=!0,r=i,n=a;break}o=o.sibling}if(!l){for(o=a.child;o;){if(o===n){l=!0,n=a,r=i;break}if(o===r){l=!0,r=a,n=i;break}o=o.sibling}if(!l)throw Error("Child was not found in either parent set. This indicates a bug in React related to the return pointer. Please file an issue.")}}if(n.alternate!==r)throw Error("Return fibers should always be each others' alternates. This error is likely caused by a bug in React. Please file an issue.")}if(3!==n.tag)throw Error("Unable to find node on an unmounted component.");return n.stateNode.current===n?e:t}function $e(e){if(!(e=qe(e)))return null;for(var t=e;;){if(5===t.tag||6===t.tag)return t;if(t.child)t.child.return=t,t=t.child;else{if(t===e)break;for(;!t.sibling;){if(!t.return||t.return===e)return null;t=t.return}t.sibling.return=t.return,t=t.sibling}}return null}var Ke={},Ge=null,Je=0,Ze={unsafelyIgnoreFunctions:!0};function et(e,n){return"object"!=typeof n||null===n||t(l[2]).deepDiffer(e,n,Ze)}function tt(e,t,n){if(Array.isArray(t))for(var r=t.length;r--&&0Tt||(e.current=yt[Tt],yt[Tt]=null,Tt--)}function St(e,t){yt[++Tt]=e.current,e.current=t}var xt={},_t={current:xt},kt={current:!1},wt=xt;function Pt(e,t){var n=e.type.contextTypes;if(!n)return xt;var r=e.stateNode;if(r&&r.__reactInternalMemoizedUnmaskedChildContext===t)return r.__reactInternalMemoizedMaskedChildContext;var i,a={};for(i in n)a[i]=t[i];return r&&((e=e.stateNode).__reactInternalMemoizedUnmaskedChildContext=t,e.__reactInternalMemoizedMaskedChildContext=a),a}function Rt(e){return null!==(e=e.childContextTypes)&&void 0!==e}function Ct(e){Et(kt),Et(_t)}function Nt(e){Et(kt),Et(_t)}function It(e,t,n){if(_t.current!==xt)throw Error("Unexpected context found on stack. This error is likely caused by a bug in React. Please file an issue.");St(_t,t),St(kt,n)}function Ut(e,n,r){var i=e.stateNode;if(e=n.childContextTypes,"function"!=typeof i.getChildContext)return r;for(var a in i=i.getChildContext())if(!(a in e))throw Error((Ye(n)||"Unknown")+'.getChildContext(): key "'+a+'" is not defined in childContextTypes.');return t(l[1])({},r,{},i)}function zt(e){var t=e.stateNode;return t=t&&t.__reactInternalMemoizedMergedChildContext||xt,wt=_t.current,St(_t,t),St(kt,kt.current),!0}function Mt(e,t,n){var r=e.stateNode;if(!r)throw Error("Expected to have an instance by this point. This error is likely caused by a bug in React. Please file an issue.");n?(t=Ut(e,t,wt),r.__reactInternalMemoizedMergedChildContext=t,Et(kt),Et(_t),St(_t,t)):Et(kt),St(kt,n)}var At={},Dt=void 0!==t(l[4]).unstable_requestPaint?t(l[4]).unstable_requestPaint:function(){},Ot=null,Lt=null,Ft=!1,Wt=t(l[4]).unstable_now(),Bt=1e4>Wt?t(l[4]).unstable_now:function(){return t(l[4]).unstable_now()-Wt};function jt(){switch(t(l[4]).unstable_getCurrentPriorityLevel()){case t(l[4]).unstable_ImmediatePriority:return 99;case t(l[4]).unstable_UserBlockingPriority:return 98;case t(l[4]).unstable_NormalPriority:return 97;case t(l[4]).unstable_LowPriority:return 96;case t(l[4]).unstable_IdlePriority:return 95;default:throw Error("Unknown priority level.")}}function Ht(e){switch(e){case 99:return t(l[4]).unstable_ImmediatePriority;case 98:return t(l[4]).unstable_UserBlockingPriority;case 97:return t(l[4]).unstable_NormalPriority;case 96:return t(l[4]).unstable_LowPriority;case 95:return t(l[4]).unstable_IdlePriority;default:throw Error("Unknown priority level.")}}function Qt(e,n){return e=Ht(e),t(l[4]).unstable_runWithPriority(e,n)}function Yt(e,n,r){return e=Ht(e),t(l[4]).unstable_scheduleCallback(e,n,r)}function Vt(e){return null===Ot?(Ot=[e],Lt=t(l[4]).unstable_scheduleCallback(t(l[4]).unstable_ImmediatePriority,qt)):Ot.push(e),At}function Xt(){if(null!==Lt){var e=Lt;Lt=null,t(l[4]).unstable_cancelCallback(e)}qt()}function qt(){if(!Ft&&null!==Ot){Ft=!0;var e=0;try{var n=Ot;Qt(99,function(){for(;e=t&&(Lr=!0),e.firstContext=null)}function sn(e,t){if(nn!==e&&!1!==t&&0!==t)if("number"==typeof t&&1073741823!==t||(nn=e,t=1073741823),t={context:e,observedBits:t,next:null},null===tn){if(null===en)throw Error("Context can only be read while React is rendering. In classes, you can read it in the render method or getDerivedStateFromProps. In function components, you can read it directly in the function body, but not inside Hooks like useReducer() or useMemo().");tn=t,en.dependencies={expirationTime:0,firstContext:t,responders:null}}else tn=tn.next=t;return e._currentValue}var cn=!1;function fn(e){return{baseState:e,firstUpdate:null,lastUpdate:null,firstCapturedUpdate:null,lastCapturedUpdate:null,firstEffect:null,lastEffect:null,firstCapturedEffect:null,lastCapturedEffect:null}}function dn(e){return{baseState:e.baseState,firstUpdate:e.firstUpdate,lastUpdate:e.lastUpdate,firstCapturedUpdate:null,lastCapturedUpdate:null,firstEffect:null,lastEffect:null,firstCapturedEffect:null,lastCapturedEffect:null}}function pn(e,t){return{expirationTime:e,suspenseConfig:t,tag:0,payload:null,callback:null,next:null,nextEffect:null}}function hn(e,t){null===e.lastUpdate?e.firstUpdate=e.lastUpdate=t:(e.lastUpdate.next=t,e.lastUpdate=t)}function mn(e,t){var n=e.alternate;if(null===n){var r=e.updateQueue,i=null;null===r&&(r=e.updateQueue=fn(e.memoizedState))}else r=e.updateQueue,i=n.updateQueue,null===r?null===i?(r=e.updateQueue=fn(e.memoizedState),i=n.updateQueue=fn(n.memoizedState)):r=e.updateQueue=dn(i):null===i&&(i=n.updateQueue=dn(r));null===i||r===i?hn(r,t):null===r.lastUpdate||null===i.lastUpdate?(hn(r,t),hn(i,t)):(hn(r,t),i.lastUpdate=t)}function gn(e,t){var n=e.updateQueue;null===(n=null===n?e.updateQueue=fn(e.memoizedState):vn(e,n)).lastCapturedUpdate?n.firstCapturedUpdate=n.lastCapturedUpdate=t:(n.lastCapturedUpdate.next=t,n.lastCapturedUpdate=t)}function vn(e,t){var n=e.alternate;return null!==n&&t===n.updateQueue&&(t=e.updateQueue=dn(t)),t}function bn(e,n,r,i,a,o){switch(r.tag){case 1:return"function"==typeof(e=r.payload)?e.call(o,i,a):e;case 3:e.effectTag=-4097&e.effectTag|64;case 0:if(null===(a="function"==typeof(e=r.payload)?e.call(o,i,a):e)||void 0===a)break;return t(l[1])({},i,a);case 2:cn=!0}return i}function yn(e,t,n,r,i){cn=!1;for(var a=(t=vn(e,t)).baseState,l=null,o=0,u=t.firstUpdate,s=a;null!==u;){var c=u.expirationTime;cm?(g=h,h=null):g=h.sibling;var v=d(i,h,o[m],u);if(null===v){null===h&&(h=g);break}e&&h&&null===v.alternate&&t(i,h),l=a(v,l,m),null===c?s=v:c.sibling=v,c=v,h=g}if(m===o.length)return n(i,h),s;if(null===h){for(;mm?(g=h,h=null):g=h.sibling;var b=d(i,h,v.value,u);if(null===b){null===h&&(h=g);break}e&&h&&null===b.alternate&&t(i,h),l=a(b,l,m),null===c?s=b:c.sibling=b,c=b,h=g}if(v.done)return n(i,h),s;if(null===h){for(;!v.done;m++,v=o.next())null!==(v=f(i,v.value,u))&&(l=a(v,l,m),null===c?s=v:c.sibling=v,c=v);return s}for(h=r(i,h);!v.done;m++,v=o.next())null!==(v=p(h,i,m,v.value,u))&&(e&&null!==v.alternate&&h.delete(null===v.key?m:v.key),l=a(v,l,m),null===c?s=v:c.sibling=v,c=v);return e&&h.forEach(function(e){return t(i,e)}),s}return function(e,r,a,o){var u="object"==typeof a&&null!==a&&a.type===Ie&&null===a.key;u&&(a=a.props.children);var s="object"==typeof a&&null!==a;if(s)switch(a.$$typeof){case Ce:e:{for(s=a.key,u=r;null!==u;){if(u.key===s){if(7===u.tag?a.type===Ie:u.elementType===a.type){n(e,u.sibling),(r=i(u,a.type===Ie?a.props.children:a.props)).ref=In(e,u,a),r.return=e,e=r;break e}n(e,u);break}t(e,u),u=u.sibling}a.type===Ie?((r=Qa(a.props.children,e.mode,o,a.key)).return=e,e=r):((o=Ha(a.type,a.key,a.props,null,e.mode,o)).ref=In(e,r,a),o.return=e,e=o)}return l(e);case Ne:e:{for(u=a.key;null!==r;){if(r.key===u){if(4===r.tag&&r.stateNode.containerInfo===a.containerInfo&&r.stateNode.implementation===a.implementation){n(e,r.sibling),(r=i(r,a.children||[])).return=e,e=r;break e}n(e,r);break}t(e,r),r=r.sibling}(r=Va(a,e.mode,o)).return=e,e=r}return l(e)}if("string"==typeof a||"number"==typeof a)return a=""+a,null!==r&&6===r.tag?(n(e,r.sibling),(r=i(r,a)).return=e,e=r):(n(e,r),(r=Ya(a,e.mode,o)).return=e,e=r),l(e);if(Nn(a))return h(e,r,a,o);if(He(a))return m(e,r,a,o);if(s&&Un(e,a),void 0===a&&!u)switch(e.tag){case 1:case 0:throw e=e.type,Error((e.displayName||e.name||"Component")+"(...): Nothing was returned from render. This usually means a return statement is missing. Or, to render nothing, return null.")}return n(e,r)}}var Mn=zn(!0),An=zn(!1),Dn={},On={current:Dn},Ln={current:Dn},Fn={current:Dn};function Wn(e){if(e===Dn)throw Error("Expected host context to exist. This error is likely caused by a bug in React. Please file an issue.");return e}function Bn(e,t){St(Fn,t),St(Ln,e),St(On,Dn),Et(On),St(On,{isInAParentText:!1})}function jn(e){Et(On),Et(Ln),Et(Fn)}function Hn(e){Wn(Fn.current);var t=Wn(On.current),n=e.type;n="AndroidTextInput"===n||"RCTMultilineTextInputView"===n||"RCTSinglelineTextInputView"===n||"RCTText"===n||"RCTVirtualText"===n,t!==(n=t.isInAParentText!==n?{isInAParentText:n}:t)&&(St(Ln,e),St(On,n))}function Qn(e){Ln.current===e&&(Et(On),Et(Ln))}var Yn={current:0};function Vn(e){for(var t=e;null!==t;){if(13===t.tag){var n=t.memoizedState;if(null!==n&&(null===(n=n.dehydrated)||ut()||ut()))return t}else if(19===t.tag&&void 0!==t.memoizedProps.revealOrder){if(0!=(64&t.effectTag))return t}else if(null!==t.child){t.child.return=t,t=t.child;continue}if(t===e)break;for(;null===t.sibling;){if(null===t.return||t.return===e)return null;t=t.return}t.sibling.return=t.return,t=t.sibling}return null}function Xn(e,t){return{responder:e,props:t}}var qn=t(l[3]).__SECRET_INTERNALS_DO_NOT_USE_OR_YOU_WILL_BE_FIRED.ReactCurrentDispatcher,$n=t(l[3]).__SECRET_INTERNALS_DO_NOT_USE_OR_YOU_WILL_BE_FIRED.ReactCurrentBatchConfig,Kn=0,Gn=null,Jn=null,Zn=null,er=null,tr=null,nr=null,rr=0,ir=null,ar=0,lr=!1,or=null,ur=0;function sr(){throw Error("Invalid hook call. Hooks can only be called inside of the body of a function component. This could happen for one of the following reasons:\n1. You might have mismatching versions of React and the renderer (such as React DOM)\n2. You might be breaking the Rules of Hooks\n3. You might have more than one copy of React in the same app\nSee https://fb.me/react-invalid-hook-call for tips about how to debug and fix this problem.")}function cr(e,t){if(null===t)return!1;for(var n=0;nrr&&Ta(rr=c)):(ya(c,u.suspenseConfig),a=u.eagerReducer===e?u.eagerState:e(a,u.action)),l=u,u=u.next}while(null!==u&&u!==r);s||(o=l,i=a),$t(a,t.memoizedState)||(Lr=!0),t.memoizedState=a,t.baseUpdate=o,t.baseState=i,n.lastRenderedState=a}return[t.memoizedState,n.dispatch]}function vr(e){var t=pr();return"function"==typeof e&&(e=e()),t.memoizedState=t.baseState=e,e=(e=t.queue={last:null,dispatch:null,lastRenderedReducer:mr,lastRenderedState:e}).dispatch=Rr.bind(null,Gn,e),[t.memoizedState,e]}function br(e){return gr(mr)}function yr(e,t,n,r){return e={tag:e,create:t,destroy:n,deps:r,next:null},null===ir?(ir={lastEffect:null}).lastEffect=e.next=e:null===(t=ir.lastEffect)?ir.lastEffect=e.next=e:(n=t.next,t.next=e,e.next=n,ir.lastEffect=e),e}function Tr(e,t,n,r){var i=pr();ar|=e,i.memoizedState=yr(t,n,void 0,void 0===r?null:r)}function Er(e,t,n,r){var i=hr();r=void 0===r?null:r;var a=void 0;if(null!==Jn){var l=Jn.memoizedState;if(a=l.destroy,null!==r&&cr(r,l.deps))return void yr(0,n,a,r)}ar|=e,i.memoizedState=yr(t,n,a,r)}function Sr(e,t){return Tr(516,192,e,t)}function xr(e,t){return Er(516,192,e,t)}function _r(e,t){return"function"==typeof t?(e=e(),t(e),function(){t(null)}):null!==t&&void 0!==t?(e=e(),t.current=e,function(){t.current=null}):void 0}function kr(){}function wr(e,t){return pr().memoizedState=[e,void 0===t?null:t],e}function Pr(e,t){var n=hr();t=void 0===t?null:t;var r=n.memoizedState;return null!==r&&null!==t&&cr(t,r[1])?r[0]:(n.memoizedState=[e,t],e)}function Rr(e,t,n){if(!(25>ur))throw Error("Too many re-renders. React limits the number of renders to prevent an infinite loop.");var r=e.alternate;if(e===Gn||null!==r&&r===Gn)if(lr=!0,e={expirationTime:Kn,suspenseConfig:null,action:n,eagerReducer:null,eagerState:null,next:null},null===or&&(or=new Map),void 0===(n=or.get(t)))or.set(t,e);else{for(t=n;null!==t.next;)t=t.next;t.next=e}else{var i=oa(),a=Sn.suspense;a={expirationTime:i=ua(i,e,a),suspenseConfig:a,action:n,eagerReducer:null,eagerState:null,next:null};var l=t.last;if(null===l)a.next=a;else{var o=l.next;null!==o&&(a.next=o),l.next=a}if(t.last=a,0===e.expirationTime&&(null===r||0===r.expirationTime)&&null!==(r=t.lastRenderedReducer))try{var u=t.lastRenderedState,s=r(u,n);if(a.eagerReducer=r,a.eagerState=s,$t(s,u))return}catch(e){}sa(e,i)}}var Cr={readContext:sn,useCallback:sr,useContext:sr,useEffect:sr,useImperativeHandle:sr,useLayoutEffect:sr,useMemo:sr,useReducer:sr,useRef:sr,useState:sr,useDebugValue:sr,useResponder:sr,useDeferredValue:sr,useTransition:sr},Nr={readContext:sn,useCallback:wr,useContext:sn,useEffect:Sr,useImperativeHandle:function(e,t,n){return n=null!==n&&void 0!==n?n.concat([e]):null,Tr(4,36,_r.bind(null,t,e),n)},useLayoutEffect:function(e,t){return Tr(4,36,e,t)},useMemo:function(e,t){var n=pr();return t=void 0===t?null:t,e=e(),n.memoizedState=[e,t],e},useReducer:function(e,t,n){var r=pr();return t=void 0!==n?n(t):t,r.memoizedState=r.baseState=t,e=(e=r.queue={last:null,dispatch:null,lastRenderedReducer:e,lastRenderedState:t}).dispatch=Rr.bind(null,Gn,e),[r.memoizedState,e]},useRef:function(e){return e={current:e},pr().memoizedState=e},useState:vr,useDebugValue:kr,useResponder:Xn,useDeferredValue:function(e,n){var r=vr(e),i=r[0],a=r[1];return Sr(function(){t(l[4]).unstable_next(function(){var t=$n.suspense;$n.suspense=void 0===n?null:n;try{a(e)}finally{$n.suspense=t}})},[e,n]),i},useTransition:function(e){var n=vr(!1),r=n[0],i=n[1];return[wr(function(n){i(!0),t(l[4]).unstable_next(function(){var t=$n.suspense;$n.suspense=void 0===e?null:e;try{i(!1),n()}finally{$n.suspense=t}})},[e,r]),r]}},Ir={readContext:sn,useCallback:Pr,useContext:sn,useEffect:xr,useImperativeHandle:function(e,t,n){return n=null!==n&&void 0!==n?n.concat([e]):null,Er(4,36,_r.bind(null,t,e),n)},useLayoutEffect:function(e,t){return Er(4,36,e,t)},useMemo:function(e,t){var n=hr();t=void 0===t?null:t;var r=n.memoizedState;return null!==r&&null!==t&&cr(t,r[1])?r[0]:(e=e(),n.memoizedState=[e,t],e)},useReducer:gr,useRef:function(){return hr().memoizedState},useState:br,useDebugValue:kr,useResponder:Xn,useDeferredValue:function(e,n){var r=br(),i=r[0],a=r[1];return xr(function(){t(l[4]).unstable_next(function(){var t=$n.suspense;$n.suspense=void 0===n?null:n;try{a(e)}finally{$n.suspense=t}})},[e,n]),i},useTransition:function(e){var n=br(),r=n[0],i=n[1];return[Pr(function(n){i(!0),t(l[4]).unstable_next(function(){var t=$n.suspense;$n.suspense=void 0===e?null:e;try{i(!1),n()}finally{$n.suspense=t}})},[e,r]),r]}},Ur=null,zr=null,Mr=!1;function Ar(e,t){switch(e.tag){case 5:return null!==(t=ut(e.type,e.pendingProps))&&(e.stateNode=t,!0);case 6:return null!==(t=ut(e.pendingProps))&&(e.stateNode=t,!0);case 13:default:return!1}}function Dr(e){if(Mr){var t=zr;if(t){var n=t;if(!Ar(e,t)){if(!(t=ut())||!Ar(e,t))return e.effectTag=-1025&e.effectTag|2,Mr=!1,void(Ur=e);var r=Ur,i=Fa(5,null,null,0);i.elementType="DELETED",i.type="DELETED",i.stateNode=n,i.return=r,i.effectTag=8,null!==r.lastEffect?(r.lastEffect.nextEffect=i,r.lastEffect=i):r.firstEffect=r.lastEffect=i}Ur=e,zr=ut()}else e.effectTag=-1025&e.effectTag|2,Mr=!1,Ur=e}}var Or=t(l[3]).__SECRET_INTERNALS_DO_NOT_USE_OR_YOU_WILL_BE_FIRED.ReactCurrentOwner,Lr=!1;function Fr(e,t,n,r){t.child=null===e?An(t,null,n,r):Mn(t,e.child,n,r)}function Wr(e,t,n,r,i){n=n.render;var a=t.ref;return un(t,i),r=fr(e,t,n,r,a,i),null===e||Lr?(t.effectTag|=1,Fr(e,t,r,i),t.child):(t.updateQueue=e.updateQueue,t.effectTag&=-517,e.expirationTime<=i&&(e.expirationTime=0),ri(e,t,i))}function Br(e,t,n,r,i,a){if(null===e){var l=n.type;return"function"!=typeof l||Wa(l)||void 0!==l.defaultProps||null!==n.compare||void 0!==n.defaultProps?((e=Ha(n.type,null,r,null,t.mode,a)).ref=t.ref,e.return=t,t.child=e):(t.tag=15,t.type=l,jr(e,t,l,r,i,a))}return l=e.child,it)&&ra.set(e,t))}}function ca(e,t){e.expirationTime(e=e.nextKnownPendingLevel)?t:e:t}function da(e){if(0!==e.lastExpiredTime)e.callbackExpirationTime=1073741823,e.callbackPriority=99,e.callbackNode=Vt(ha.bind(null,e));else{var n=fa(e),r=e.callbackNode;if(0===n)null!==r&&(e.callbackNode=null,e.callbackExpirationTime=0,e.callbackPriority=90);else{var i=oa();if(1073741823===n?i=99:1===n||2===n?i=95:i=0>=(i=10*(1073741821-n)-10*(1073741821-i))?99:250>=i?98:5250>=i?97:95,null!==r){var a=e.callbackPriority;if(e.callbackExpirationTime===n&&a>=i)return;r!==At&&t(l[4]).unstable_cancelCallback(r)}e.callbackExpirationTime=n,e.callbackPriority=i,n=1073741823===n?Vt(ha.bind(null,e)):Yt(i,pa.bind(null,e),{timeout:10*(1073741821-n)-Bt()}),e.callbackNode=n}}}function pa(e,t){if(la=0,t)return Ga(e,t=oa()),da(e),null;var n=fa(e);if(0!==n){if(t=e.callbackNode,(48&Oi)!==Pi)throw Error("Should not already be working.");if(Ca(),e===Li&&n===Wi||ga(e,n),null!==Fi){var r=Oi;Oi|=Ci;for(var i=ba();;)try{Sa();break}catch(t){va(e,t)}if(rn(),Oi=r,ki.current=i,Bi===Ui)throw t=ji,ga(e,n),$a(e,n),da(e),t;if(null===Fi)switch(i=e.finishedWork=e.current.alternate,e.finishedExpirationTime=n,r=Bi,Li=null,r){case Ii:case Ui:throw Error("Root did not complete. This is a bug in React.");case zi:Ga(e,2=n){e.lastPingedTime=n,ga(e,n);break}}if(0!==(a=fa(e))&&a!==n)break;if(0!==r&&r!==n){e.lastPingedTime=r;break}e.timeoutHandle=mt(wa.bind(null,e),i);break}wa(e);break;case Ai:if($a(e,n),n===(r=e.lastSuspendedTime)&&(e.nextKnownPendingLevel=ka(i)),Xi&&(0===(i=e.lastPingedTime)||i>=n)){e.lastPingedTime=n,ga(e,n);break}if(0!==(i=fa(e))&&i!==n)break;if(0!==r&&r!==n){e.lastPingedTime=r;break}if(1073741823!==Qi?r=10*(1073741821-Qi)-Bt():1073741823===Hi?r=0:(r=10*(1073741821-Hi)-5e3,n=10*(1073741821-n)-(i=Bt()),0>(r=i-r)&&(r=0),n<(r=(120>r?120:480>r?480:1080>r?1080:1920>r?1920:3e3>r?3e3:4320>r?4320:1960*_i(r/1960))-r)&&(r=n)),10=(r=0|l.busyMinDurationMs)?r=0:(i=0|l.busyDelayMs,r=(a=Bt()-(10*(1073741821-a)-(0|l.timeoutMs||5e3)))<=i?0:i+r-a),10 component higher in the tree to provide a loading indicator or placeholder to display."+bt(i))}Bi!==Di&&(Bi=zi),a=li(a,i),u=r;do{switch(u.tag){case 3:l=a,u.effectTag|=4096,u.expirationTime=t,gn(u,Ei(u,l,t));break e;case 1:l=a;var v=u.type,b=u.stateNode;if(0==(64&u.effectTag)&&("function"==typeof v.getDerivedStateFromError||null!==b&&"function"==typeof b.componentDidCatch&&(null===Zi||!Zi.has(b)))){u.effectTag|=4096,u.expirationTime=t,gn(u,Si(u,l,t));break e}}u=u.return}while(null!==u)}Fi=_a(Fi)}catch(e){t=e;continue}break}}function ba(){var e=ki.current;return ki.current=Cr,null===e?Cr:e}function ya(e,t){eVi&&(Vi=e)}function Ea(){for(;null!==Fi;)Fi=xa(Fi)}function Sa(){for(;null!==Fi&&!t(l[4]).unstable_shouldYield();)Fi=xa(Fi)}function xa(e){var t=xi(e.alternate,e,Wi);return e.memoizedProps=e.pendingProps,null===t&&(t=_a(e)),wi.current=null,t}function _a(e){Fi=e;do{var n=Fi.alternate;if(e=Fi.return,0==(2048&Fi.effectTag)){e:{var r=n,i=Wi,a=(n=Fi).pendingProps;switch(n.tag){case 2:case 16:break;case 15:case 0:break;case 1:Rt(n.type)&&Ct();break;case 3:jn(),Nt(),(r=n.stateNode).pendingContext&&(r.context=r.pendingContext,r.pendingContext=null),$r(n);break;case 5:Qn(n);var o=Wn(Fn.current);if(i=n.type,null!==r&&null!=n.stateNode)Kr(r,n,i,a,o),r.ref!==n.ref&&(n.effectTag|=128);else if(a){r=Wn(On.current);var u=n,s=dt(),c=st(i),f=at(null,Ke,a,c.validAttributes);t(l[2]).UIManager.createView(s,c.uiViewClassName,o,f),c=new ot(s,c),ge.set(s,u),ve.set(s,a),qr(c,n,!1,!1),n.stateNode=c,ht(c)&&(n.effectTag|=4),null!==n.ref&&(n.effectTag|=128)}else if(null===n.stateNode)throw Error("We must have new props for new mounts. This error is likely caused by a bug in React. Please file an issue.");break;case 6:if(r&&null!=n.stateNode)Gr(r,n,r.memoizedProps,a);else{if("string"!=typeof a&&null===n.stateNode)throw Error("We must have new props for new mounts. This error is likely caused by a bug in React. Please file an issue.");if(i=Wn(Fn.current),r=n,!(o=Wn(On.current)).isInAParentText)throw Error("Text strings must be rendered within a component.");o=dt(),t(l[2]).UIManager.createView(o,"RCTRawText",i,{text:a}),ge.set(o,n),r.stateNode=o}break;case 11:break;case 13:if(Et(Yn),a=n.memoizedState,0!=(64&n.effectTag)){n.expirationTime=i;break e}a=null!==a,o=!1,null!==r&&(o=null!==(i=r.memoizedState),a||null===i||null!==(i=r.child.sibling)&&(null!==(u=n.firstEffect)?(n.firstEffect=i,i.nextEffect=u):(n.firstEffect=n.lastEffect=i,i.nextEffect=null),i.effectTag=8)),a&&!o&&0!=(2&n.mode)&&(null===r&&!0!==n.memoizedProps.unstable_avoidThisFallback||0!=(1&Yn.current)?Bi===Ii&&(Bi=Mi):(Bi!==Ii&&Bi!==Mi||(Bi=Ai),0!==Vi&&null!==Li&&($a(Li,Wi),Ka(Li,Vi)))),(a||o)&&(n.effectTag|=4);break;case 7:case 8:case 12:break;case 4:jn(),$r(n);break;case 10:ln(n);break;case 9:case 14:break;case 17:Rt(n.type)&&Ct();break;case 19:if(Et(Yn),null===(a=n.memoizedState))break;if(o=0!=(64&n.effectTag),null===(u=a.rendering)){if(o)ii(a,!1);else if(Bi!==Ii||null!==r&&0!=(64&r.effectTag))for(r=n.child;null!==r;){if(null!==(u=Vn(r))){for(n.effectTag|=64,ii(a,!1),null!==(r=u.updateQueue)&&(n.updateQueue=r,n.effectTag|=4),null===a.lastEffect&&(n.firstEffect=null),n.lastEffect=a.lastEffect,r=i,a=n.child;null!==a;)i=r,(o=a).effectTag&=2,o.nextEffect=null,o.firstEffect=null,o.lastEffect=null,null===(u=o.alternate)?(o.childExpirationTime=0,o.expirationTime=i,o.child=null,o.memoizedProps=null,o.memoizedState=null,o.updateQueue=null,o.dependencies=null):(o.childExpirationTime=u.childExpirationTime,o.expirationTime=u.expirationTime,o.child=u.child,o.memoizedProps=u.memoizedProps,o.memoizedState=u.memoizedState,o.updateQueue=u.updateQueue,i=u.dependencies,o.dependencies=null===i?null:{expirationTime:i.expirationTime,firstContext:i.firstContext,responders:i.responders}),a=a.sibling;St(Yn,1&Yn.current|2),n=n.child;break e}r=r.sibling}}else{if(!o)if(null!==(r=Vn(u))){if(n.effectTag|=64,o=!0,null!==(r=r.updateQueue)&&(n.updateQueue=r,n.effectTag|=4),ii(a,!0),null===a.tail&&"hidden"===a.tailMode&&!u.alternate){null!==(n=n.lastEffect=a.lastEffect)&&(n.nextEffect=null);break}}else Bt()>a.tailExpiration&&1a&&(a=i),u>a&&(a=u),o=o.sibling;r.childExpirationTime=a}if(null!==n)return n;null!==e&&0==(2048&e.effectTag)&&(null===e.firstEffect&&(e.firstEffect=Fi.firstEffect),null!==Fi.lastEffect&&(null!==e.lastEffect&&(e.lastEffect.nextEffect=Fi.firstEffect),e.lastEffect=Fi.lastEffect),1(e=e.childExpirationTime)?t:e}function wa(e){var t=jt();return Qt(99,Pa.bind(null,e,t)),null}function Pa(e,t){if(Ca(),(48&Oi)!==Pi)throw Error("Should not already be working.");var n=e.finishedWork,r=e.finishedExpirationTime;if(null===n)return null;if(e.finishedWork=null,e.finishedExpirationTime=0,n===e.current)throw Error("Cannot commit the same tree as before. This error is likely caused by a bug in React. Please file an issue.");e.callbackNode=null,e.callbackExpirationTime=0,e.callbackPriority=90,e.nextKnownPendingLevel=0;var i=ka(n);if(e.firstPendingTime=i,r<=e.lastSuspendedTime?e.firstSuspendedTime=e.lastSuspendedTime=e.nextKnownPendingLevel=0:r<=e.firstSuspendedTime&&(e.firstSuspendedTime=r-1),r<=e.lastPingedTime&&(e.lastPingedTime=0),r<=e.lastExpiredTime&&(e.lastExpiredTime=0),e===Li&&(Fi=Li=null,Wi=0),1=n?Zr(e,t,n):(St(Yn,1&Yn.current),null!==(t=ri(e,t,n))?t.sibling:null);St(Yn,1&Yn.current);break;case 19:if(r=t.childExpirationTime>=n,0!=(64&e.effectTag)){if(r)return ni(e,t,n);t.effectTag|=64}var i=t.memoizedState;if(null!==i&&(i.rendering=null,i.tail=null),St(Yn,Yn.current),!r)return null}return ri(e,t,n)}Lr=!1}else Lr=!1;switch(t.expirationTime=0,t.tag){case 2:if(r=t.type,null!==e&&(e.alternate=null,t.alternate=null,t.effectTag|=2),e=t.pendingProps,i=Pt(t,_t.current),un(t,n),i=fr(null,t,r,e,i,n),t.effectTag|=1,"object"==typeof i&&null!==i&&"function"==typeof i.render&&void 0===i.$$typeof){if(t.tag=1,dr(),Rt(r)){var a=!0;zt(t)}else a=!1;t.memoizedState=null!==i.state&&void 0!==i.state?i.state:null;var l=r.getDerivedStateFromProps;"function"==typeof l&&_n(t,r,l,e),i.updater=kn,t.stateNode=i,i._reactInternalFiber=t,Cn(t,r,e,n),t=Vr(null,t,r,!0,a,n)}else t.tag=0,Fr(null,t,i,n),t=t.child;return t;case 16:if(i=t.elementType,null!==e&&(e.alternate=null,t.alternate=null,t.effectTag|=2),e=t.pendingProps,Qe(i),1!==i._status)throw i._result;switch(i=i._result,t.type=i,a=t.tag=Ba(i),e=Jt(i,e),a){case 0:t=Qr(null,t,i,e,n);break;case 1:t=Yr(null,t,i,e,n);break;case 11:t=Wr(null,t,i,e,n);break;case 14:t=Br(null,t,i,Jt(i.type,e),r,n);break;default:throw Error("Element type is invalid. Received a promise that resolves to: "+i+". Lazy element type must resolve to a class or function.")}return t;case 0:return r=t.type,i=t.pendingProps,Qr(e,t,r,i=t.elementType===r?i:Jt(r,i),n);case 1:return r=t.type,i=t.pendingProps,Yr(e,t,r,i=t.elementType===r?i:Jt(r,i),n);case 3:if(Xr(t),null===(r=t.updateQueue))throw Error("If the root does not have an updateQueue, we should have already bailed out. This error is likely caused by a bug in React. Please file an issue.");return i=null!==(i=t.memoizedState)?i.element:null,yn(t,r,t.pendingProps,null,n),(r=t.memoizedState.element)===i?t=ri(e,t,n):(Fr(e,t,r,n),t=t.child),t;case 5:return Hn(t),null===e&&Dr(t),r=t.pendingProps.children,Hr(e,t),Fr(e,t,r,n),t=t.child;case 6:return null===e&&Dr(t),null;case 13:return Zr(e,t,n);case 4:return Bn(t,t.stateNode.containerInfo),r=t.pendingProps,null===e?t.child=Mn(t,null,r,n):Fr(e,t,r,n),t.child;case 11:return r=t.type,i=t.pendingProps,Wr(e,t,r,i=t.elementType===r?i:Jt(r,i),n);case 7:return Fr(e,t,t.pendingProps,n),t.child;case 8:case 12:return Fr(e,t,t.pendingProps.children,n),t.child;case 10:e:{if(r=t.type._context,i=t.pendingProps,l=t.memoizedProps,an(t,a=i.value),null!==l){var o=l.value;if(0===(a=$t(o,a)?0:0|("function"==typeof r._calculateChangedBits?r._calculateChangedBits(o,a):1073741823))){if(l.children===i.children&&!kt.current){t=ri(e,t,n);break e}}else for(null!==(o=t.child)&&(o.return=t);null!==o;){var u=o.dependencies;if(null!==u){l=o.child;for(var s=u.firstContext;null!==s;){if(s.context===r&&0!=(s.observedBits&a)){1===o.tag&&((s=pn(n,null)).tag=2,mn(o,s)),o.expirationTime=t&&e<=t}function $a(e,t){var n=e.firstSuspendedTime,r=e.lastSuspendedTime;nt||0===n)&&(e.lastSuspendedTime=t),t<=e.lastPingedTime&&(e.lastPingedTime=0),t<=e.lastExpiredTime&&(e.lastExpiredTime=0)}function Ka(e,t){t>e.firstPendingTime&&(e.firstPendingTime=t);var n=e.firstSuspendedTime;0!==n&&(t>=n?e.firstSuspendedTime=e.lastSuspendedTime=e.nextKnownPendingLevel=0:t>=e.lastSuspendedTime&&(e.lastSuspendedTime=t+1),t>e.nextKnownPendingLevel&&(e.nextKnownPendingLevel=t))}function Ga(e,t){var n=e.lastExpiredTime;(0===n||n>t)&&(e.lastExpiredTime=t)}function Ja(e){var t=e._reactInternalFiber;if(void 0===t){if("function"==typeof e.render)throw Error("Unable to find node on an unmounted component.");throw Error("Argument appears to not be a ReactComponent. Keys: "+Object.keys(e))}return null===(e=$e(t))?null:e.stateNode}function Za(e,t,n,r){var i=t.current,a=oa(),l=Sn.suspense;a=ua(a,i,l);e:if(n){n=n._reactInternalFiber;t:{if(Ve(n)!==n||1!==n.tag)throw Error("Expected subtree parent to be a mounted class component. This error is likely caused by a bug in React. Please file an issue.");var o=n;do{switch(o.tag){case 3:o=o.stateNode.context;break t;case 1:if(Rt(o.type)){o=o.stateNode.__reactInternalMemoizedMergedChildContext;break t}}o=o.return}while(null!==o);throw Error("Found unexpected detached subtree parent. This error is likely caused by a bug in React. Please file an issue.")}if(1===n.tag){var u=n.type;if(Rt(u)){n=Ut(n,u,o);break e}}n=o}else n=xt;return null===t.context?t.context=n:t.pendingContext=n,(t=pn(a,l)).payload={element:e},null!==(r=void 0===r?null:r)&&(t.callback=r),mn(i,t),sa(i,a),a}function el(e,t,n){var r=30){var t=s.slice();s=[];for(var n=0;n0}function x(t){f[t]=null,o[t]=null,c[t]=null}function y(t){if(null!=t){var n=f.indexOf(t);if(-1!==n){var l=c[n];x(n),'setImmediate'!==l&&'requestIdleCallback'!==l&&O(t)}}}var C,A={setTimeout:function(t,n){for(var l=arguments.length,u=new Array(l>2?l-2:0),o=2;o2?l-2:0),o=2;o1?n-1:0),u=1;u-1&&(v.splice(t,1),k(o,l(),!0)),delete h[o],0===v.length&&F(!1)},u);h[o]=c}return o},cancelIdleCallback:function(t){y(t);var n=v.indexOf(t);-1!==n&&v.splice(n,1);var l=h[t];l&&(A.clearTimeout(l),delete h[t]),0===v.length&&F(!1)},clearTimeout:function(t){y(t)},clearInterval:function(t){y(t)},clearImmediate:function(t){y(t);var n=s.indexOf(t);-1!==n&&s.splice(n,1)},cancelAnimationFrame:function(t){y(t)},callTimers:function(t){r(d[4])(0!==t.length,'Cannot call `callTimers` with an empty list of IDs.'),T=null;for(var n=0;n1)for(var u=1;u0){var n=v.slice();v=[];for(var o=0;o=0,loaded:s,total:n})}},{key:"__didCompleteResponse",value:function(t,s,n){t===this._requestId&&(s&&(''!==this._responseType&&'text'!==this._responseType||(this._response=s),this._hasError=!0,n&&(this._timedOut=!0)),this._clearSubscriptions(),this._requestId=null,this.setReadyState(this.DONE),s?l._interceptor&&l._interceptor.loadingFailed(t,s):l._interceptor&&l._interceptor.loadingFinished(t,this._response.length))}},{key:"_clearSubscriptions",value:function(){(this._subscriptions||[]).forEach(function(t){t&&t.remove()}),this._subscriptions=[]}},{key:"getAllResponseHeaders",value:function(){if(!this.responseHeaders)return null;var t=this.responseHeaders||{};return Object.keys(t).map(function(s){return s+': '+t[s]}).join('\r\n')}},{key:"getResponseHeader",value:function(t){var s=this._lowerCaseResponseHeaders[t.toLowerCase()];return void 0!==s?s:null}},{key:"setRequestHeader",value:function(t,s){if(this.readyState!==this.OPENED)throw new Error('Request has not been opened');this._headers[t.toLowerCase()]=String(s)}},{key:"setTrackingName",value:function(t){return this._trackingName=t,this}},{key:"open",value:function(t,s,n){if(this.readyState!==this.UNSENT)throw new Error('Cannot open, already sending');if(void 0!==n&&!n)throw new Error('Synchronous http requests are not supported');if(!s)throw new Error('Cannot load an empty url');this._method=t.toUpperCase(),this._url=s,this._aborted=!1,this.setReadyState(this.OPENED)}},{key:"send",value:function(t){var s=this;if(this.readyState!==this.OPENED)throw new Error('Request has not been opened');if(this._sent)throw new Error('Request has already been sent');this._sent=!0;var n=this._incrementalEvents||!!this.onreadystatechange||!!this.onprogress;this._subscriptions.push(r(d[7]).addListener('didSendNetworkData',function(t){return s.__didUploadProgress.apply(s,r(d[8])(t))})),this._subscriptions.push(r(d[7]).addListener('didReceiveNetworkResponse',function(t){return s.__didReceiveResponse.apply(s,r(d[8])(t))})),this._subscriptions.push(r(d[7]).addListener('didReceiveNetworkData',function(t){return s.__didReceiveData.apply(s,r(d[8])(t))})),this._subscriptions.push(r(d[7]).addListener('didReceiveNetworkIncrementalData',function(t){return s.__didReceiveIncrementalData.apply(s,r(d[8])(t))})),this._subscriptions.push(r(d[7]).addListener('didReceiveNetworkDataProgress',function(t){return s.__didReceiveDataProgress.apply(s,r(d[8])(t))})),this._subscriptions.push(r(d[7]).addListener('didCompleteNetworkResponse',function(t){return s.__didCompleteResponse.apply(s,r(d[8])(t))}));var o='text';'arraybuffer'===this._responseType&&(o='base64'),'blob'===this._responseType&&(o='blob'),r(d[9])(this._method,'Request method needs to be defined.'),r(d[9])(this._url,'Request URL needs to be defined.'),r(d[7]).sendRequest(this._method,this._trackingName,this._url,this._headers,t,o,n,this.timeout,this.__didCreateRequest.bind(this),this.withCredentials)}},{key:"abort",value:function(){this._aborted=!0,this._requestId&&r(d[7]).abortRequest(this._requestId),this.readyState===this.UNSENT||this.readyState===this.OPENED&&!this._sent||this.readyState===this.DONE||(this._reset(),this.setReadyState(this.DONE)),this._reset()}},{key:"setResponseHeaders",value:function(t){this.responseHeaders=t||null;var s=t||{};this._lowerCaseResponseHeaders=Object.keys(s).reduce(function(t,n){return t[n.toLowerCase()]=s[n],t},{})}},{key:"setReadyState",value:function(t){this.readyState=t,this.dispatchEvent({type:'readystatechange'}),t===this.DONE&&(this._aborted?this.dispatchEvent({type:'abort'}):this._hasError?this._timedOut?this.dispatchEvent({type:'timeout'}):this.dispatchEvent({type:'error'}):this.dispatchEvent({type:'load'}),this.dispatchEvent({type:'loadend'}))}},{key:"addEventListener",value:function(t,s){'readystatechange'!==t&&'progress'!==t||(this._incrementalEvents=!0),r(d[10])(r(d[0])(l.prototype),"addEventListener",this).call(this,t,s)}},{key:"responseType",get:function(){return this._responseType},set:function(t){if(this._sent)throw new Error("Failed to set the 'responseType' property on 'XMLHttpRequest': The response type cannot be set after the request has been sent.");c.hasOwnProperty(t)?(r(d[9])(c[t]||'document'===t,"The provided value '"+t+"' is unsupported in this environment."),'blob'===t&&r(d[9])(r(d[2]).isAvailable,'Native module BlobModule is required for blob support'),this._responseType=t):r(d[11])(!1,"The provided value '"+t+"' is not a valid 'responseType'.")}},{key:"responseText",get:function(){if(''!==this._responseType&&'text'!==this._responseType)throw new Error("The 'responseText' property is only available if 'responseType' is set to '' or 'text', but it is '"+this._responseType+"'.");return this.readyState0&&void 0!==arguments[0]?arguments[0]:[],s=arguments.length>1?arguments[1]:void 0;r(d[0])(this,t);var o=r(d[1]);this.data=o.createFromParts(n,s).data}return r(d[2])(t,[{key:"slice",value:function(t,n){var s=r(d[1]),o=this.data,u=o.offset,l=o.size;return'number'==typeof t&&(t>l&&(t=l),u+=t,l-=t,'number'==typeof n&&(n<0&&(n=this.size+n),l=n-t)),s.createFromOptions({blobId:this.data.blobId,offset:u,size:l})}},{key:"close",value:function(){r(d[1]).release(this.data.blobId),this.data=null}},{key:"data",set:function(t){this._data=t},get:function(){if(!this._data)throw new Error('Blob has been closed and is no longer available');return this._data}},{key:"size",get:function(){return this.data.size}},{key:"type",get:function(){return this.data.type||''}}]),t})();m.exports=t},151,[5,149,6]); -__d(function(g,r,i,a,m,e,d){var n={};m.exports={register:function(t){n[t]?n[t]++:n[t]=1},unregister:function(t){n[t]&&(n[t]--,n[t]<=0&&delete n[t])},has:function(t){return n[t]&&n[t]>0}}},152,[]); -__d(function(g,r,i,a,m,e,d){'use strict';Object.defineProperty(e,'__esModule',{value:!0});var t=new WeakMap,n=new WeakMap;function o(n){var o=t.get(n);return console.assert(null!=o,"'this' is expected an Event object, but got",n),o}function l(t){null==t.passiveListener?t.event.cancelable&&(t.canceled=!0,"function"==typeof t.event.preventDefault&&t.event.preventDefault()):"undefined"!=typeof console&&"function"==typeof console.error&&console.error("Unable to preventDefault inside passive event listener invocation.",t.passiveListener)}function u(n,o){t.set(this,{eventTarget:n,event:o,eventPhase:2,currentTarget:n,canceled:!1,stopped:!1,immediateStopped:!1,passiveListener:null,timeStamp:o.timeStamp||Date.now()}),Object.defineProperty(this,"isTrusted",{value:!1,enumerable:!0});for(var l=Object.keys(o),u=0;u0){for(var t=new Array(arguments.length),n=0;n0?C-4:C;for(u=0;u>16&255,s[v++]=h>>8&255,s[v++]=255&h;2===y&&(h=n[t.charCodeAt(u)]<<2|n[t.charCodeAt(u+1)]>>4,s[v++]=255&h);1===y&&(h=n[t.charCodeAt(u)]<<10|n[t.charCodeAt(u+1)]<<4|n[t.charCodeAt(u+2)]>>2,s[v++]=h>>8&255,s[v++]=255&h);return s},e.fromByteArray=function(n){for(var o,h=n.length,u=h%3,c=[],f=0,A=h-u;fA?A:f+16383));1===u?(o=n[h-1],c.push(t[o>>2]+t[o<<4&63]+'==')):2===u&&(o=(n[h-2]<<8)+n[h-1],c.push(t[o>>10]+t[o>>4&63]+t[o<<2&63]+'='));return c.join('')};for(var t=[],n=[],o='undefined'!=typeof Uint8Array?Uint8Array:Array,h='ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/',u=0,c=h.length;u0)throw new Error('Invalid string. Length must be a multiple of 4');var o=t.indexOf('=');return-1===o&&(o=n),[o,o===n?0:4-o%4]}function A(t,n,o){return 3*(n+o)/4-o}function C(n,o,h){for(var u,c,f=[],A=o;A>18&63]+t[c>>12&63]+t[c>>6&63]+t[63&c]);return f.join('')}n['-'.charCodeAt(0)]=62,n['_'.charCodeAt(0)]=63},159,[]); -__d(function(g,r,i,a,m,e,d){'use strict';function t(){if("undefined"==typeof Reflect||!Reflect.construct)return!1;if(Reflect.construct.sham)return!1;if("function"==typeof Proxy)return!0;try{return Date.prototype.toString.call(Reflect.construct(Date,[],function(){})),!0}catch(t){return!1}}var n=(function(n){r(d[2])(l,n);var o,s,u=(o=l,s=t(),function(){var t,n=r(d[0])(o);if(s){var u=r(d[0])(this).constructor;t=Reflect.construct(n,arguments,u)}else t=n.apply(this,arguments);return r(d[1])(this,t)});function l(t){var n;return r(d[3])(this,l),n=u.call(this,r(d[4]).sharedSubscriber),r(d[5])(t,'Native module cannot be null.'),n._nativeModule=t,n}return r(d[6])(l,[{key:"addListener",value:function(t,n,o){return null!=this._nativeModule&&this._nativeModule.addListener(t),r(d[7])(r(d[0])(l.prototype),"addListener",this).call(this,t,n,o)}},{key:"removeAllListeners",value:function(t){r(d[5])(t,'eventType argument is required.');var n=this.listeners(t).length;null!=this._nativeModule&&this._nativeModule.removeListeners(n),r(d[7])(r(d[0])(l.prototype),"removeAllListeners",this).call(this,t)}},{key:"removeSubscription",value:function(t){null!=this._nativeModule&&this._nativeModule.removeListeners(1),r(d[7])(r(d[0])(l.prototype),"removeSubscription",this).call(this,t)}}]),l})(r(d[8]));m.exports=n},160,[12,9,7,5,48,25,6,50,52]); -__d(function(g,r,i,a,m,e,d){'use strict';var t=r(d[0])(r(d[1])),s=r(d[0])(r(d[2])),n=r(d[0])(r(d[3])),o=r(d[0])(r(d[4])),c=r(d[0])(r(d[5])),u=r(d[0])(r(d[6])),l=r(d[0])(r(d[7]));function f(){if("undefined"==typeof Reflect||!Reflect.construct)return!1;if(Reflect.construct.sham)return!1;if("function"==typeof Proxy)return!0;try{return Date.prototype.toString.call(Reflect.construct(Date,[],function(){})),!0}catch(t){return!1}}var h=0,y=1,b=2,p=3,v=0,_=(function(_){(0,o.default)(I,_);var E,k,S=(E=I,k=f(),function(){var t,s=(0,u.default)(E);if(k){var n=(0,u.default)(this).constructor;t=Reflect.construct(s,arguments,n)}else t=s.apply(this,arguments);return(0,c.default)(this,t)});function I(n,o,c){var u;(0,s.default)(this,I),(u=S.call(this)).CONNECTING=h,u.OPEN=y,u.CLOSING=b,u.CLOSED=p,u.readyState=h,'string'==typeof o&&(o=[o]);var f=c||{},_=f.headers,E=void 0===_?{}:_,k=(0,t.default)(f,["headers"]);return k&&'string'==typeof k.origin&&(console.warn('Specifying `origin` as a WebSocket connection option is deprecated. Include it under `headers` instead.'),E.origin=k.origin,delete k.origin),Object.keys(k).length>0&&console.warn('Unrecognized WebSocket connection option(s) `'+Object.keys(k).join('`, `')+"`. Did you mean to put these under `headers`?"),Array.isArray(o)||(o=null),u._eventEmitter=new(r(d[8]))(l.default),u._socketId=v++,u._registerEvents(),l.default.connect(n,o,{headers:E},u._socketId),u}return(0,n.default)(I,[{key:"close",value:function(t,s){this.readyState!==this.CLOSING&&this.readyState!==this.CLOSED&&(this.readyState=this.CLOSING,this._close(t,s))}},{key:"send",value:function(t){if(this.readyState===this.CONNECTING)throw new Error('INVALID_STATE_ERR');if(t instanceof r(d[9]))return r(d[10])(r(d[11]).isAvailable,'Native module BlobModule is required for blob support'),void r(d[11]).sendOverSocket(t,this._socketId);if('string'!=typeof t){if(!(t instanceof ArrayBuffer||ArrayBuffer.isView(t)))throw new Error('Unsupported data type');l.default.sendBinary(r(d[12])(t),this._socketId)}else l.default.send(t,this._socketId)}},{key:"ping",value:function(){if(this.readyState===this.CONNECTING)throw new Error('INVALID_STATE_ERR');l.default.ping(this._socketId)}},{key:"_close",value:function(t,s){var n='number'==typeof t?t:1e3,o='string'==typeof s?s:'';l.default.close(n,o,this._socketId),r(d[11]).isAvailable&&'blob'===this._binaryType&&r(d[11]).removeWebSocketHandler(this._socketId)}},{key:"_unregisterEvents",value:function(){this._subscriptions.forEach(function(t){return t.remove()}),this._subscriptions=[]}},{key:"_registerEvents",value:function(){var t=this;this._subscriptions=[this._eventEmitter.addListener('websocketMessage',function(s){if(s.id===t._socketId){var n=s.data;switch(s.type){case'binary':n=r(d[13]).toByteArray(s.data).buffer;break;case'blob':n=r(d[11]).createFromOptions(s.data)}t.dispatchEvent(new(r(d[14]))('message',{data:n}))}}),this._eventEmitter.addListener('websocketOpen',function(s){s.id===t._socketId&&(t.readyState=t.OPEN,t.protocol=s.protocol,t.dispatchEvent(new(r(d[14]))('open')))}),this._eventEmitter.addListener('websocketClosed',function(s){s.id===t._socketId&&(t.readyState=t.CLOSED,t.dispatchEvent(new(r(d[14]))('close',{code:s.code,reason:s.reason})),t._unregisterEvents(),t.close())}),this._eventEmitter.addListener('websocketFailed',function(s){s.id===t._socketId&&(t.readyState=t.CLOSED,t.dispatchEvent(new(r(d[14]))('error',{message:s.message})),t.dispatchEvent(new(r(d[14]))('close',{message:s.message})),t._unregisterEvents(),t.close())})]}},{key:"binaryType",get:function(){return this._binaryType},set:function(t){if('blob'!==t&&'arraybuffer'!==t)throw new Error("binaryType must be either 'blob' or 'arraybuffer'");'blob'!==this._binaryType&&'blob'!==t||(r(d[10])(r(d[11]).isAvailable,'Native module BlobModule is required for blob support'),'blob'===t?r(d[11]).addWebSocketHandler(this._socketId):r(d[11]).removeWebSocketHandler(this._socketId)),this._binaryType=t}}]),I})(r(d[15]).apply(void 0,['close','error','message','open']));_.CONNECTING=h,_.OPEN=y,_.CLOSING=b,_.CLOSED=p,m.exports=_},161,[1,126,5,6,7,9,12,162,160,151,25,149,158,159,163,153]); -__d(function(g,r,i,a,m,e,d){'use strict';Object.defineProperty(e,"__esModule",{value:!0}),e.default=void 0;var t=r(d[0])(r(d[1])).getEnforcing('WebSocketModule');e.default=t},162,[13,24]); -__d(function(g,r,i,a,m,e,d){'use strict';m.exports=function t(s,n){r(d[0])(this,t),this.type=s.toString(),r(d[1])(this,n)}},163,[5,17]); -__d(function(g,r,i,a,m,e,d){'use strict';function t(){if("undefined"==typeof Reflect||!Reflect.construct)return!1;if(Reflect.construct.sham)return!1;if("function"==typeof Proxy)return!0;try{return Date.prototype.toString.call(Reflect.construct(Date,[],function(){})),!0}catch(t){return!1}}var n=(function(n){r(d[2])(o,n);var u,c,s=(u=o,c=t(),function(){var t,n=r(d[0])(u);if(c){var s=r(d[0])(this).constructor;t=Reflect.construct(n,arguments,s)}else t=n.apply(this,arguments);return r(d[1])(this,t)});function o(t,n,u){var c;return r(d[3])(this,o),r(d[4])(null!=t&&null!=n,'Failed to construct `File`: Must pass both `parts` and `name` arguments.'),(c=s.call(this,t,u)).data.name=n,c}return r(d[5])(o,[{key:"name",get:function(){return r(d[4])(null!=this.data.name,'Files must have a name set.'),this.data.name}},{key:"lastModified",get:function(){return this.data.lastModified||0}}]),o})(r(d[6]));m.exports=n},164,[12,9,7,5,25,6,151]); -__d(function(g,r,i,a,m,e,d){'use strict';var t=r(d[0])(r(d[1])),n=r(d[0])(r(d[2])),s=r(d[0])(r(d[3])),o=r(d[0])(r(d[4])),u=r(d[0])(r(d[5])),c=r(d[0])(r(d[6]));function f(){if("undefined"==typeof Reflect||!Reflect.construct)return!1;if(Reflect.construct.sham)return!1;if("function"==typeof Proxy)return!0;try{return Date.prototype.toString.call(Reflect.construct(Date,[],function(){})),!0}catch(t){return!1}}var l=0,h=1,_=2,y=(function(y){(0,s.default)(R,y);var p,v,b=(p=R,v=f(),function(){var t,n=(0,u.default)(p);if(v){var s=(0,u.default)(this).constructor;t=Reflect.construct(n,arguments,s)}else t=n.apply(this,arguments);return(0,o.default)(this,t)});function R(){var n;return(0,t.default)(this,R),(n=b.call(this)).EMPTY=l,n.LOADING=h,n.DONE=_,n._aborted=!1,n._subscriptions=[],n._reset(),n}return(0,n.default)(R,[{key:"_reset",value:function(){this._readyState=l,this._error=null,this._result=null}},{key:"_clearSubscriptions",value:function(){this._subscriptions.forEach(function(t){return t.remove()}),this._subscriptions=[]}},{key:"_setReadyState",value:function(t){this._readyState=t,this.dispatchEvent({type:'readystatechange'}),t===_&&(this._aborted?this.dispatchEvent({type:'abort'}):this._error?this.dispatchEvent({type:'error'}):this.dispatchEvent({type:'load'}),this.dispatchEvent({type:'loadend'}))}},{key:"readAsArrayBuffer",value:function(){throw new Error('FileReader.readAsArrayBuffer is not implemented')}},{key:"readAsDataURL",value:function(t){var n=this;this._aborted=!1,c.default.readAsDataURL(t.data).then(function(t){n._aborted||(n._result=t,n._setReadyState(_))},function(t){n._aborted||(n._error=t,n._setReadyState(_))})}},{key:"readAsText",value:function(t){var n=this,s=arguments.length>1&&void 0!==arguments[1]?arguments[1]:'UTF-8';this._aborted=!1,c.default.readAsText(t.data,s).then(function(t){n._aborted||(n._result=t,n._setReadyState(_))},function(t){n._aborted||(n._error=t,n._setReadyState(_))})}},{key:"abort",value:function(){this._aborted=!0,this._readyState!==l&&this._readyState!==_&&(this._reset(),this._setReadyState(_)),this._reset()}},{key:"readyState",get:function(){return this._readyState}},{key:"error",get:function(){return this._error}},{key:"result",get:function(){return this._result}}]),R})(r(d[7]).apply(void 0,['abort','error','load','loadstart','loadend','progress']));y.EMPTY=l,y.LOADING=h,y.DONE=_,m.exports=y},165,[1,5,6,7,9,12,166,153]); -__d(function(g,r,i,a,m,e,d){'use strict';Object.defineProperty(e,"__esModule",{value:!0}),e.default=void 0;var t=r(d[0])(r(d[1])).getEnforcing('FileReaderModule');e.default=t},166,[13,24]); -__d(function(g,r,i,a,m,e,d){'use strict';Object.defineProperty(e,"__esModule",{value:!0}),e.URL=e.URLSearchParams=void 0;var t,n=r(d[0])(r(d[1])),o=r(d[0])(r(d[2])),u=r(d[0])(r(d[3])),s=null;if(u.default&&'string'==typeof u.default.getConstants().BLOB_URI_SCHEME){var f=u.default.getConstants();s=f.BLOB_URI_SCHEME+':','string'==typeof f.BLOB_URI_HOST&&(s+="//"+f.BLOB_URI_HOST+"/")}t=Symbol.iterator;var h=(function(){function u(t){var o=this;(0,n.default)(this,u),this._searchParams=[],'object'==typeof t&&Object.keys(t).forEach(function(n){return o.append(n,t[n])})}return(0,o.default)(u,[{key:"append",value:function(t,n){this._searchParams.push([t,n])}},{key:"delete",value:function(t){throw new Error('not implemented')}},{key:"get",value:function(t){throw new Error('not implemented')}},{key:"getAll",value:function(t){throw new Error('not implemented')}},{key:"has",value:function(t){throw new Error('not implemented')}},{key:"set",value:function(t,n){throw new Error('not implemented')}},{key:"sort",value:function(){throw new Error('not implemented')}},{key:t,value:function(){return this._searchParams[Symbol.iterator]()}},{key:"toString",value:function(){if(0===this._searchParams.length)return'';var t=this._searchParams.length-1;return this._searchParams.reduce(function(n,o,u){return n+o.join('=')+(u===t?'':'&')},'')}}]),u})();function l(t){return/^(?:(?:(?:https?|ftp):)?\/\/)(?:(?:[1-9]\d?|1\d\d|2[01]\d|22[0-3])(?:\.(?:1?\d{1,2}|2[0-4]\d|25[0-5])){2}(?:\.(?:[1-9]\d?|1\d\d|2[0-4]\d|25[0-4]))|(?:(?:[a-z\u00a1-\uffff0-9]-*)*[a-z\u00a1-\uffff0-9]+)(?:\.(?:[a-z\u00a1-\uffff0-9]-*)*[a-z\u00a1-\uffff0-9]+)*(?:\.(?:[a-z\u00a1-\uffff]{2,}))?)(?::\d{2,5})?(?:[/?#]\S*)?$/i.test(t)}e.URLSearchParams=h;var c=(function(){function t(o,u){(0,n.default)(this,t),this._searchParamsInstance=null;var s=null;if(!u||l(o))this._url=o,this._url.endsWith('/')||(this._url+='/');else{if('string'==typeof u){if(!l(s=u))throw new TypeError("Invalid base URL: "+s)}else'object'==typeof u&&(s=u.toString());s.endsWith('/')&&(s=s.slice(0,s.length-1)),o.startsWith('/')||(o="/"+o),s.endsWith(o)&&(o=''),this._url=""+s+o}}return(0,o.default)(t,null,[{key:"createObjectURL",value:function(t){if(null===s)throw new Error('Cannot create URL for blob!');return""+s+t.data.blobId+"?offset="+t.data.offset+"&size="+t.size}},{key:"revokeObjectURL",value:function(t){}}]),(0,o.default)(t,[{key:"toJSON",value:function(){return this.toString()}},{key:"toString",value:function(){if(null===this._searchParamsInstance)return this._url;var t=this._url.indexOf('?')>-1?'&':'?';return this._url+t+this._searchParamsInstance.toString()}},{key:"hash",get:function(){throw new Error('not implemented')}},{key:"host",get:function(){throw new Error('not implemented')}},{key:"hostname",get:function(){throw new Error('not implemented')}},{key:"href",get:function(){return this.toString()}},{key:"origin",get:function(){throw new Error('not implemented')}},{key:"password",get:function(){throw new Error('not implemented')}},{key:"pathname",get:function(){throw new Error('not implemented')}},{key:"port",get:function(){throw new Error('not implemented')}},{key:"protocol",get:function(){throw new Error('not implemented')}},{key:"search",get:function(){throw new Error('not implemented')}},{key:"searchParams",get:function(){return null==this._searchParamsInstance&&(this._searchParamsInstance=new h),this._searchParamsInstance}},{key:"username",get:function(){throw new Error('not implemented')}}]),t})();e.URL=c},167,[1,5,6,150]); -__d(function(g,r,i,a,m,e,d){'use strict';function t(){if("undefined"==typeof Reflect||!Reflect.construct)return!1;if(Reflect.construct.sham)return!1;if("function"==typeof Proxy)return!0;try{return Date.prototype.toString.call(Reflect.construct(Date,[],function(){})),!0}catch(t){return!1}}Object.defineProperty(e,'__esModule',{value:!0});var o=(function(o){r(d[2])(f,o);var l,c,u=(l=f,c=t(),function(){var t,o=r(d[0])(l);if(c){var n=r(d[0])(this).constructor;t=Reflect.construct(o,arguments,n)}else t=o.apply(this,arguments);return r(d[1])(this,t)});function f(){throw r(d[3])(this,f),u.call(this),new TypeError("AbortSignal cannot be constructed directly")}return r(d[4])(f,[{key:"aborted",get:function(){var t=n.get(this);if("boolean"!=typeof t)throw new TypeError("Expected 'this' to be an 'AbortSignal' object, but got "+(null===this?"null":typeof this));return t}}]),f})(r(d[5]).EventTarget);r(d[5]).defineEventAttribute(o.prototype,"abort");var n=new WeakMap;Object.defineProperties(o.prototype,{aborted:{enumerable:!0}}),"function"==typeof Symbol&&"symbol"==typeof Symbol.toStringTag&&Object.defineProperty(o.prototype,Symbol.toStringTag,{configurable:!0,value:"AbortSignal"});var l=(function(){function t(){var l;r(d[3])(this,t),c.set(this,(l=Object.create(o.prototype),r(d[5]).EventTarget.call(l),n.set(l,!1),l))}return r(d[4])(t,[{key:"abort",value:function(){var t;t=u(this),!1===n.get(t)&&(n.set(t,!0),t.dispatchEvent({type:"abort"}))}},{key:"signal",get:function(){return u(this)}}]),t})(),c=new WeakMap;function u(t){var o=c.get(t);if(null==o)throw new TypeError("Expected 'this' to be an 'AbortController' object, but got "+(null===t?"null":typeof t));return o}Object.defineProperties(l.prototype,{signal:{enumerable:!0},abort:{enumerable:!0}}),"function"==typeof Symbol&&"symbol"==typeof Symbol.toStringTag&&Object.defineProperty(l.prototype,Symbol.toStringTag,{configurable:!0,value:"AbortController"}),e.AbortController=l,e.AbortSignal=o,e.default=l,m.exports=l,m.exports.AbortController=m.exports.default=l,m.exports.AbortSignal=o},168,[12,9,7,5,6,153]); -__d(function(g,r,i,a,m,e,d){'use strict';g.alert||(g.alert=function(t){r(d[0]).alert('Alert',''+t)})},169,[170]); -__d(function(g,r,i,a,m,e,d){'use strict';var t=r(d[0])(r(d[1])),n=r(d[0])(r(d[2])),o=r(d[0])(r(d[3])),l=r(d[0])(r(d[4])),s=r(d[0])(r(d[5])),u=(function(){function u(){(0,t.default)(this,u)}return(0,n.default)(u,null,[{key:"alert",value:function(t,n,s,c){if('ios'===o.default.OS)u.prompt(t,n,s,'default');else if('android'===o.default.OS){if(!l.default)return;var f=l.default.getConstants(),p={title:t||'',message:n||'',cancelable:!1};c&&c.cancelable&&(p.cancelable=c.cancelable);var v=s?s.slice(0,3):[{text:"OK"}],y=v.pop(),b=v.pop(),h=v.pop();h&&(p.buttonNeutral=h.text||''),b&&(p.buttonNegative=b.text||''),y&&(p.buttonPositive=y.text||"OK");l.default.showAlert(p,function(t){return console.warn(t)},function(t,n){t===f.buttonClicked?n===f.buttonNeutral?h.onPress&&h.onPress():n===f.buttonNegative?b.onPress&&b.onPress():n===f.buttonPositive&&y.onPress&&y.onPress():t===f.dismissed&&c&&c.onDismiss&&c.onDismiss()})}}},{key:"prompt",value:function(t,n,l){var u=arguments.length>3&&void 0!==arguments[3]?arguments[3]:'plain-text',c=arguments.length>4?arguments[4]:void 0,f=arguments.length>5?arguments[5]:void 0;if('ios'===o.default.OS){if('function'==typeof u){console.warn("You passed a callback function as the \"type\" argument to Alert.prompt(). React Native is assuming you want to use the deprecated Alert.prompt(title, defaultValue, buttons, callback) signature. The current signature is Alert.prompt(title, message, callbackOrButtons, type, defaultValue, keyboardType) and the old syntax will be removed in a future version.");var p=u;return void s.default.alertWithArgs({title:t||'',type:'plain-text',defaultValue:n||''},function(t,n){p(n)})}var v,y,b=[],h=[];'function'==typeof l?b=[l]:Array.isArray(l)&&l.forEach(function(t,n){if(b[n]=t.onPress,'cancel'===t.style?v=String(n):'destructive'===t.style&&(y=String(n)),t.text||n<(l||[]).length-1){var o={};o[n]=t.text||'',h.push(o)}}),s.default.alertWithArgs({title:t||'',message:n||void 0,buttons:h,type:u||void 0,defaultValue:c,cancelButtonKey:v,destructiveButtonKey:y,keyboardType:f},function(t,n){var o=b[t];o&&o(n)})}}}]),u})();m.exports=u},170,[1,5,6,82,171,172]); -__d(function(g,r,i,a,m,e,d){'use strict';Object.defineProperty(e,"__esModule",{value:!0}),e.default=void 0;var t=r(d[0])(r(d[1])).get('DialogManagerAndroid');e.default=t},171,[13,24]); -__d(function(g,r,i,a,m,e,d){'use strict';var t=r(d[0])(r(d[1]));m.exports={alertWithArgs:function(l,u){null!=t.default&&t.default.alertWithArgs(l,u)}}},172,[1,173]); -__d(function(g,r,i,a,m,e,d){'use strict';Object.defineProperty(e,"__esModule",{value:!0}),e.default=void 0;var t=r(d[0])(r(d[1])).get('AlertManager');e.default=t},173,[13,24]); -__d(function(g,r,i,a,m,e,d){'use strict';var t=g.navigator;void 0===t&&(g.navigator=t={}),r(d[0]).polyfillObjectProperty(t,'product',function(){return'ReactNative'})},174,[139]); -__d(function(g,r,i,a,m,e,d){'use strict';if(!g.RN$Bridgeless){var l=r(d[0]);l.registerLazyCallableModule('Systrace',function(){return r(d[1])}),l.registerLazyCallableModule('JSTimers',function(){return r(d[2])}),l.registerLazyCallableModule('HeapCapture',function(){return r(d[3])}),l.registerLazyCallableModule('SamplingProfiler',function(){return r(d[4])}),l.registerLazyCallableModule('RCTLog',function(){return r(d[5])}),l.registerLazyCallableModule('RCTDeviceEventEmitter',function(){return r(d[6])}),l.registerLazyCallableModule('RCTNativeAppEventEmitter',function(){return r(d[7])}),l.registerLazyCallableModule('GlobalPerformanceLogger',function(){return r(d[8])}),l.registerLazyCallableModule('JSDevSupportModule',function(){return r(d[9])}),l.registerLazyCallableModule('HMRClient',function(){return r(d[10])})}},175,[33,35,142,176,178,180,48,181,182,184,186]); -__d(function(g,r,i,a,m,e,d){'use strict';var t=r(d[0])(r(d[1])),p={captureHeap:function(p){var u=null;try{g.nativeCaptureHeap(p),console.log('HeapCapture.captureHeap succeeded: '+p)}catch(t){console.log('HeapCapture.captureHeap error: '+t.toString()),u=t.toString()}t.default&&t.default.captureComplete(p,u)}};m.exports=p},176,[1,177]); -__d(function(g,r,i,a,m,e,d){'use strict';Object.defineProperty(e,"__esModule",{value:!0}),e.default=void 0;var t=r(d[0])(r(d[1])).get('HeapCapture');e.default=t},177,[13,24]); -__d(function(g,r,i,a,m,e,d){'use strict';var o={poke:function(o){var l=null,n=null;try{null===(n=g.pokeSamplingProfiler())?console.log('The JSC Sampling Profiler has started'):console.log('The JSC Sampling Profiler has stopped')}catch(o){console.log('Error occurred when restarting Sampling Profiler: '+o.toString()),l=o.toString()}var t=r(d[0]).default;t&&t.operationComplete(o,n,l)}};m.exports=o},178,[179]); -__d(function(g,r,i,a,m,e,d){'use strict';Object.defineProperty(e,"__esModule",{value:!0}),e.default=void 0;var t=r(d[0])(r(d[1])).get('JSCSamplingProfiler');e.default=t},179,[13,24]); -__d(function(g,r,i,a,m,e,d){'use strict';var o={log:'log',info:'info',warn:'warn',error:'error',fatal:'error'},n=null,l={logIfNoNativeHook:function(o){for(var t=arguments.length,f=new Array(t>1?t-1:0),c=1;c1?f-1:0),v=1;v=F},u=function(){},e.unstable_forceFrameRate=function(n){0>n||125q(c,o))void 0!==b&&0>q(b,c)?(n[l]=b,n[f]=o,l=f):(n[l]=c,n[s]=o,l=s);else{if(!(void 0!==b&&0>q(b,o)))break e;n[l]=b,n[f]=o,l=f}}}return t}return null}function q(n,t){var o=n.sortIndex-t.sortIndex;return 0!==o?o:n.id-t.id}var D=[],R=[],E=1,N=null,B=3,O=!1,U=!1,W=!1;function Y(n){for(var t=L(R);null!==t;){if(null===t.callback)j(R);else{if(!(t.startTime<=n))break;j(R),t.sortIndex=t.expirationTime,A(D,t)}t=L(R)}}function z(o){if(W=!1,Y(o),!U)if(null!==L(D))U=!0,n(G);else{var l=L(R);null!==l&&t(z,l.startTime-o)}}function G(n,u){U=!1,W&&(W=!1,o()),O=!0;var s=B;try{for(Y(u),N=L(D);null!==N&&(!(N.expirationTime>u)||n&&!l());){var c=N.callback;if(null!==c){N.callback=null,B=N.priorityLevel;var f=c(N.expirationTime<=u);u=e.unstable_now(),"function"==typeof f?N.callback=f:N===L(D)&&j(D),Y(u)}else j(D);N=L(D)}if(null!==N)var b=!0;else{var p=L(R);null!==p&&t(z,p.startTime-u),b=!1}return b}finally{N=null,B=s,O=!1}}function H(n){switch(n){case 1:return-1;case 2:return 250;case 5:return 1073741823;case 4:return 1e4;default:return 5e3}}var J=u;e.unstable_ImmediatePriority=1,e.unstable_UserBlockingPriority=2,e.unstable_NormalPriority=3,e.unstable_IdlePriority=5,e.unstable_LowPriority=4,e.unstable_runWithPriority=function(n,t){switch(n){case 1:case 2:case 3:case 4:case 5:break;default:n=3}var o=B;B=n;try{return t()}finally{B=o}},e.unstable_next=function(n){switch(B){case 1:case 2:case 3:var t=3;break;default:t=B}var o=B;B=t;try{return n()}finally{B=o}},e.unstable_scheduleCallback=function(l,u,s){var c=e.unstable_now();if("object"==typeof s&&null!==s){var f=s.delay;f="number"==typeof f&&0c?(l.sortIndex=f,A(R,l),null===L(D)&&l===L(R)&&(W?o():W=!0,t(z,f-c))):(l.sortIndex=s,A(D,l),U||O||(U=!0,n(G))),l},e.unstable_cancelCallback=function(n){n.callback=null},e.unstable_wrapCallback=function(n){var t=B;return function(){var o=B;B=t;try{return n.apply(this,arguments)}finally{B=o}}},e.unstable_getCurrentPriorityLevel=function(){return B},e.unstable_shouldYield=function(){var n=e.unstable_now();Y(n);var t=L(D);return t!==N&&null!==N&&null!==t&&null!==t.callback&&t.startTime<=n&&t.expirationTime0?E._pressDelayTimeout=setTimeout(function(){E._receiveSignal('DELAY',t)},o):E._receiveSignal('DELAY',t);var l=I(n,10,500);E._longPressDelayTimeout=setTimeout(function(){E._handleLongPress(t)},l+o)},onResponderMove:function(t){null!=E._config.onPressMove&&E._config.onPressMove(t);var R=E._responderRegion;if(null!=R){var n=A(t);if(null==n)return E._cancelLongPressDelayTimeout(),void E._receiveSignal('LEAVE_PRESS_RECT',t);if(null!=E._touchActivatePosition){var _=E._touchActivatePosition.pageX-n.pageX,o=E._touchActivatePosition.pageY-n.pageY;Math.hypot(_,o)>10&&E._cancelLongPressDelayTimeout()}E._isTouchWithinResponderRegion(n,R)?E._receiveSignal('ENTER_PRESS_RECT',t):(E._cancelLongPressDelayTimeout(),E._receiveSignal('LEAVE_PRESS_RECT',t))}},onResponderRelease:function(t){E._receiveSignal('RESPONDER_RELEASE',t)},onResponderTerminate:function(t){E._receiveSignal('RESPONDER_TERMINATED',t)},onResponderTerminationRequest:function(){var t=E._config.onResponderTerminationRequest;return null==t||t()},onClick:function(t){var R=E._config.onPress;null!=R&&R(t)}},n='ios'===o.default.OS||'android'===o.default.OS?null:{onMouseEnter:function(t){if((0,r(d[10]).isHoverEnabled)()){E._isHovered=!0,E._cancelHoverOutDelayTimeout();var R=E._config,n=R.onHoverIn,_=R.getHoverInDelayMS;if(null!=n){var o=I(_);o>0?E._hoverInDelayTimeout=setTimeout(function(){n(t)},o):n(t)}}},onMouseLeave:function(t){if(E._isHovered){E._isHovered=!1,E._cancelHoverInDelayTimeout();var R=E._config,n=R.onHoverOut,_=R.getHoverOutDelayMS;if(null!=n){var o=I(_);o>0?E._hoverInDelayTimeout=setTimeout(function(){n(t)},o):n(t)}}}};return s(s(s({},t),R),n)}},{key:"_receiveSignal",value:function(E,t){var R,_=this._touchState,o=null==(R=S[_])?void 0:R[E];null==this._responderID&&'RESPONDER_RELEASE'===E||((0,n.default)(null!=o&&'ERROR'!==o,'Pressability: Invalid signal `%s` for state `%s` on responder: %s',E,_,'number'==typeof this._responderID?this._responderID:'<>'),_!==o&&(this._performTransitionSideEffects(_,o,E,t),this._touchState=o))}},{key:"_performTransitionSideEffects",value:function(E,t,R,n){N(R)&&(this._touchActivatePosition=null,this._cancelLongPressDelayTimeout());var l='NOT_RESPONDER'===E&&'RESPONDER_INACTIVE_PRESS_IN'===t,u=!O(E)&&O(t);if((l||u)&&this._measureResponderRegion(),P(E)&&'LONG_PRESS_DETECTED'===R){var s=this._config.onLongPress;null!=s&&s(n)}var S=T(E),D=T(t);if(!S&&D?this._activate(n):S&&!D&&this._deactivate(n),P(E)&&'RESPONDER_RELEASE'===R){var c=this._config,h=c.onLongPress,v=c.onPress,f=c.getTouchSoundDisabled;if(null!=v)if(!(null!=h&&'RESPONDER_ACTIVE_LONG_PRESS_IN'===E&&this._shouldLongPressCancelPress())){var I;D||S||(this._activate(n),this._deactivate(n));var A=null!=(I=null==f?null:f())&&I;'android'!==o.default.OS||A||_.default.playTouchSound(),v(n)}}this._cancelPressDelayTimeout()}},{key:"_activate",value:function(E){var t=this._config.onPressIn,R=A(E);this._touchActivatePosition={pageX:R.pageX,pageY:R.pageY},null!=t&&t(E)}},{key:"_deactivate",value:function(E){var t=this._config,R=t.onPressOut,n=t.getPressOutDelayMS;if(null!=R){var _=I(n);_>0?this._pressOutDelayTimeout=setTimeout(function(){R(E)},_):R(E)}}},{key:"_measureResponderRegion",value:function(){null!=this._responderID&&('number'==typeof this._responderID?l.default.measure(this._responderID,this._measureCallback):this._responderID.measure(this._measureCallback))}},{key:"_isTouchWithinResponderRegion",value:function(E,t){var R=this._config,n=R.getHitSlop,_=R.getPressRectOffset,o=t.bottom,l=t.left,u=t.right,s=t.top,S=null==n?null:n();null!=S&&(null!=S.bottom&&(o+=S.bottom),null!=S.left&&(l-=S.left),null!=S.right&&(u+=S.right),null!=S.top&&(s-=S.top));var T=null==_?null:_();return o+=null==T||null==T.bottom?D:T.bottom,l-=null==T||null==T.left?c:T.left,u+=null==T||null==T.right?h:T.right,s-=null==T||null==T.top?v:T.top,E.pageX>l&&E.pageXs&&E.pageY1&&void 0!==arguments[1]?arguments[1]:0,n=arguments.length>2&&void 0!==arguments[2]?arguments[2]:0;return Math.max(R,null!=(t=null==E?null:E())?t:n)},A=function(E){var t=E.nativeEvent,R=t.changedTouches,n=t.touches;return null!=n&&n.length>0?n[0]:null!=R&&R.length>0?R[0]:E.nativeEvent}},194,[1,19,5,6,25,195,82,87,13,14,197]); -__d(function(g,r,i,a,m,e,d){'use strict';var u=r(d[0])(r(d[1])),o={playTouchSound:function(){u.default&&u.default.playTouchSound()}};m.exports=o},195,[1,196]); -__d(function(g,r,i,a,m,e,d){'use strict';Object.defineProperty(e,"__esModule",{value:!0}),e.default=void 0;var t=r(d[0])(r(d[1])).get('SoundManager');e.default=t},196,[13,24]); -__d(function(g,r,i,a,m,e,d){'use strict';Object.defineProperty(e,"__esModule",{value:!0}),e.isHoverEnabled=function(){return n};var n=!1;if('web'===r(d[0])(r(d[1])).default.OS&&Boolean('undefined'!=typeof window&&window.document&&window.document.createElement)){var t=0,o=function(){t=Date.now(),n&&(n=!1)};document.addEventListener('touchstart',o,!0),document.addEventListener('touchmove',o,!0),document.addEventListener('mousemove',function(){n||Date.now()-t<1e3||(n=!0)},!0)}},197,[1,82]); -__d(function(g,r,i,a,m,e,d){'use strict';Object.defineProperty(e,"__esModule",{value:!0}),e.default=void 0;var t=r(d[0])(r(d[1])),n=r(d[0])(r(d[2])),u=r(d[0])(r(d[3])),l=r(d[0])(r(d[4])),s=r(d[0])(r(d[5])),f=r(d[0])(r(d[6])),o=(function(){function o(n,v){(0,t.default)(this,o),(0,u.default)(s.default.isTV,'TVTouchable: Requires `Platform.isTV`.'),this._tvEventHandler=new f.default,this._tvEventHandler.enable(n,function(t,u){u.dispatchConfig={},l.default.findNodeHandle(n)===u.tag&&('focus'===u.eventType?v.onFocus(u):'blur'===u.eventType?v.onBlur(u):'select'===u.eventType&&(v.getDisabled()||v.onPress(u)))})}return(0,n.default)(o,[{key:"destroy",value:function(){this._tvEventHandler.disable()}}]),o})();e.default=o},198,[1,5,6,25,131,82,199]); -__d(function(g,r,i,a,m,e,d){'use strict';var t=r(d[0])(r(d[1])),n=r(d[0])(r(d[2])),v=r(d[0])(r(d[3])),_=(function(){function _(){(0,t.default)(this,_),this.__nativeTVNavigationEventListener=null,this.__nativeTVNavigationEventEmitter=null}return(0,n.default)(_,[{key:"enable",value:function(t,n){v.default&&(this.__nativeTVNavigationEventEmitter=new(r(d[4]))(v.default),this.__nativeTVNavigationEventListener=this.__nativeTVNavigationEventEmitter.addListener('onHWKeyEvent',function(v){n&&n(t,v)}))}},{key:"disable",value:function(){this.__nativeTVNavigationEventListener&&(this.__nativeTVNavigationEventListener.remove(),delete this.__nativeTVNavigationEventListener),this.__nativeTVNavigationEventEmitter&&delete this.__nativeTVNavigationEventEmitter}}]),_})();m.exports=_},199,[1,5,6,200,160]); -__d(function(g,r,i,a,m,e,d){'use strict';Object.defineProperty(e,"__esModule",{value:!0}),e.default=void 0;var t=r(d[0])(r(d[1])).get('TVNavigationEventEmitter');e.default=t},200,[13,24]); -__d(function(g,r,i,a,m,e,d){'use strict';var t=r(d[0])(r(d[1]));function n(t,n){var c=Object.keys(t);if(Object.getOwnPropertySymbols){var o=Object.getOwnPropertySymbols(t);n&&(o=o.filter(function(n){return Object.getOwnPropertyDescriptor(t,n).enumerable})),c.push.apply(c,o)}return c}var c=r(d[0])(r(d[2])).default.isTesting?r(d[3]):r(d[4]);m.exports=(function(c){for(var o=1;o1&&void 0!==arguments[1]?arguments[1]:{}).iterations;return o},event:function(t,n){return null},createAnimatedComponent:r(d[6]),attachNativeEvent:r(d[7]).attachNativeEvent,forkEvent:r(d[5]).forkEvent,unforkEvent:r(d[5]).unforkEvent,Event:r(d[7]).AnimatedEvent,__PropsOnlyForTests:r(d[8])}},202,[19,203,213,206,208,214,230,229,232]); -__d(function(g,r,i,a,m,e,d){'use strict';function t(){if("undefined"==typeof Reflect||!Reflect.construct)return!1;if(Reflect.construct.sham)return!1;if("function"==typeof Proxy)return!0;try{return Date.prototype.toString.call(Reflect.construct(Date,[],function(){})),!0}catch(t){return!1}}function n(t){var n=new Set;!(function t(s){'function'==typeof s.update?n.add(s):s.__getChildren().forEach(t)})(t),n.forEach(function(t){return t.update()})}var s=(function(s){r(d[2])(l,s);var u,o,_=(u=l,o=t(),function(){var t,n=r(d[0])(u);if(o){var s=r(d[0])(this).constructor;t=Reflect.construct(n,arguments,s)}else t=n.apply(this,arguments);return r(d[1])(this,t)});function l(t){var n;return r(d[3])(this,l),(n=_.call(this))._startingValue=n._value=t,n._offset=0,n._animation=null,n}return r(d[4])(l,[{key:"__detach",value:function(){this.stopAnimation(),r(d[5])(r(d[0])(l.prototype),"__detach",this).call(this)}},{key:"__getValue",value:function(){return this._value+this._offset}},{key:"setValue",value:function(t){this._animation&&(this._animation.stop(),this._animation=null),this._updateValue(t,!this.__isNative),this.__isNative&&r(d[6]).API.setAnimatedNodeValue(this.__getNativeTag(),t)}},{key:"setOffset",value:function(t){this._offset=t,this.__isNative&&r(d[6]).API.setAnimatedNodeOffset(this.__getNativeTag(),t)}},{key:"flattenOffset",value:function(){this._value+=this._offset,this._offset=0,this.__isNative&&r(d[6]).API.flattenAnimatedNodeOffset(this.__getNativeTag())}},{key:"extractOffset",value:function(){this._offset+=this._value,this._value=0,this.__isNative&&r(d[6]).API.extractAnimatedNodeOffset(this.__getNativeTag())}},{key:"stopAnimation",value:function(t){this.stopTracking(),this._animation&&this._animation.stop(),this._animation=null,t&&t(this.__getValue())}},{key:"resetAnimation",value:function(t){this.stopAnimation(t),this._value=this._startingValue}},{key:"_onAnimatedValueUpdateReceived",value:function(t){this._updateValue(t,!1)}},{key:"interpolate",value:function(t){return new(r(d[7]))(this,t)}},{key:"animate",value:function(t,n){var s=this,u=null;t.__isInteraction&&(u=r(d[8]).createInteractionHandle());var o=this._animation;this._animation&&this._animation.stop(),this._animation=t,t.start(this._value,function(t){s._updateValue(t,!0)},function(t){s._animation=null,null!==u&&r(d[8]).clearInteractionHandle(u),n&&n(t)},o,this)}},{key:"stopTracking",value:function(){this._tracking&&this._tracking.__detach(),this._tracking=null}},{key:"track",value:function(t){this.stopTracking(),this._tracking=t}},{key:"_updateValue",value:function(t,s){this._value=t,s&&n(this),r(d[5])(r(d[0])(l.prototype),"__callListeners",this).call(this,this.__getValue())}},{key:"__getNativeConfig",value:function(){return{type:'value',value:this._value,offset:this._offset}}}]),l})(r(d[9]));m.exports=s},203,[12,9,7,5,6,50,204,206,209,207]); -__d(function(g,r,i,a,m,e,d){'use strict';var t,n=r(d[0])(r(d[1])),o=r(d[0])(r(d[2])),l=r(d[0])(r(d[3])),u=1,s=1,f=!1,v=[],c={enableQueue:function(){f=!0},disableQueue:function(){(0,l.default)(o.default,'Native animated module is not available'),f=!1;for(var t=0,n=v.length;to){if('identity'===l)return h;'clamp'===l&&(h=o)}return u===c?u:n===o?t<=n?u:c:(n===-1/0?h=-h:o===1/0?h-=n:h=(h-n)/(o-n),h=p(h),u===-1/0?h=-h:c===1/0?h+=u:h=h*(c-u)+u,h)}function f(t){var n=r(d[4])(t);return null===n?t:"rgba("+((4278190080&(n=n||0))>>>24)+", "+((16711680&n)>>>16)+", "+((65280&n)>>>8)+", "+(255&n)/255+")"}var l=/[+-]?(?:\d+\.?\d*|\.\d+)(?:[eE][+-]?\d+)?/g;function h(t){var n=t.outputRange;r(d[3])(n.length>=2,'Bad output range'),s(n=n.map(f));var u=n[0].match(l).map(function(){return[]});n.forEach(function(t){t.match(l).forEach(function(t,n){u[n].push(+t)})});var p,h=n[0].match(l).map(function(n,p){return c(o(o({},t),{},{outputRange:u[p]}))}),v='string'==typeof(p=n[0])&&p.startsWith('rgb');return function(t){var o=0;return n[0].replace(l,function(){var n=+h[o++](t);return v&&(n=o<4?Math.round(n):Math.round(1e3*n)/1e3),String(n)})}}function s(t){for(var n=t[0].replace(l,''),o=1;o=t);++o);return o-1}function _(t){r(d[3])(t.length>=2,'inputRange must have at least 2 elements');for(var n=1;n=t[n-1],'inputRange must be monotonically non-decreasing '+t)}function y(t,n){r(d[3])(n.length>=2,t+' must have at least 2 elements'),r(d[3])(2!==n.length||n[0]!==-1/0||n[1]!==1/0,t+'cannot be ]-infinity;+infinity[ '+n)}var R=(function(n){r(d[5])(f,n);var o,u,p=(o=f,u=t(),function(){var t,n=r(d[0])(o);if(u){var c=r(d[0])(this).constructor;t=Reflect.construct(n,arguments,c)}else t=n.apply(this,arguments);return r(d[1])(this,t)});function f(t,n){var o;return r(d[6])(this,f),(o=p.call(this))._parent=t,o._config=n,o._interpolation=c(n),o}return r(d[7])(f,[{key:"__makeNative",value:function(){this._parent.__makeNative(),r(d[8])(r(d[0])(f.prototype),"__makeNative",this).call(this)}},{key:"__getValue",value:function(){var t=this._parent.__getValue();return r(d[3])('number'==typeof t,'Cannot interpolate an input which is not a number.'),this._interpolation(t)}},{key:"interpolate",value:function(t){return new f(this,t)}},{key:"__attach",value:function(){this._parent.__addChild(this)}},{key:"__detach",value:function(){this._parent.__removeChild(this),r(d[8])(r(d[0])(f.prototype),"__detach",this).call(this)}},{key:"__transformDataType",value:function(t){return t.map(r(d[9]).transformDataType)}},{key:"__getNativeConfig",value:function(){return{inputRange:this._config.inputRange,outputRange:this.__transformDataType(this._config.outputRange),extrapolateLeft:this._config.extrapolateLeft||this._config.extrapolate||'extend',extrapolateRight:this._config.extrapolateRight||this._config.extrapolate||'extend',type:'interpolation'}}}]),f})(r(d[10]));R.__createInterpolation=c,m.exports=R},206,[12,9,19,25,106,7,5,6,50,204,207]); -__d(function(g,r,i,a,m,e,d){'use strict';function t(t,o){var c;if("undefined"==typeof Symbol||null==t[Symbol.iterator]){if(Array.isArray(t)||(c=n(t))||o&&t&&"number"==typeof t.length){c&&(t=c);var s=0;return function(){return s>=t.length?{done:!0}:{done:!1,value:t[s++]}}}throw new TypeError("Invalid attempt to iterate non-iterable instance.\nIn order to be iterable, non-array objects must have a [Symbol.iterator]() method.")}return(c=t[Symbol.iterator]()).next.bind(c)}function n(t,n){if(t){if("string"==typeof t)return o(t,n);var c=Object.prototype.toString.call(t).slice(8,-1);return"Object"===c&&t.constructor&&(c=t.constructor.name),"Map"===c||"Set"===c?Array.from(t):"Arguments"===c||/^(?:Ui|I)nt(?:8|16|32)(?:Clamped)?Array$/.test(c)?o(t,n):void 0}}function o(t,n){(null==n||n>t.length)&&(n=t.length);for(var o=0,c=new Array(n);o0?setTimeout(h,0):setImmediate(h))}function h(){l=0;var f=o.size;c.forEach(function(n){return o.add(n)}),s.forEach(function(n){return o.delete(n)});var h=o.size;if(0!==f&&0===h?n.emit(t.Events.interactionComplete):0===f&&0!==h&&n.emit(t.Events.interactionStart),0===h)for(;u.hasTasksToProcess();)if(u.processNext(),p>0&&r(d[4]).getEventLoopRunningTime()>=p){v();break}c.clear(),s.clear()}m.exports=t},209,[52,210,25,212,33]); -__d(function(g,r,i,a,m,e,d){'use strict';m.exports=function(n){var t,o={};for(t in n instanceof Object&&!Array.isArray(n)||r(d[0])(!1),n)n.hasOwnProperty(t)&&(o[t]=t);return o}},210,[211]); -__d(function(g,r,i,a,m,e,d){'use strict';var n=function(n){if(void 0===n)throw new Error('invariant(...): Second argument must be a string.')};m.exports=function(o,t){for(var f=arguments.length,s=new Array(f>2?f-2:0),u=2;u0||0===n})}},{key:"hasTasksToProcess",value:function(){return this._getCurrentQueue().length>0}},{key:"processNext",value:function(){var t=this._getCurrentQueue();if(t.length){var n=t.shift();try{n.gen?this._genPromise(n):n.run?n.run():(r(d[3])('function'==typeof n,'Expected Function, SimpleTask, or PromiseTask, but got:\n'+JSON.stringify(n,null,2)),n())}catch(t){throw t.message='TaskQueue: Error with task '+(n.name||'')+': '+t.message,t}}}},{key:"_getCurrentQueue",value:function(){var t=this._queueStack.length-1,n=this._queueStack[t];return n.popable&&0===n.tasks.length&&this._queueStack.length>1?(this._queueStack.pop(),this._getCurrentQueue()):n.tasks}},{key:"_genPromise",value:function(t){var n=this;this._queueStack.push({tasks:[],popable:!1});var u=this._queueStack.length-1;t.gen().then(function(){n._queueStack[u].popable=!0,n.hasTasksToProcess()&&n._onMoreTasks()}).catch(function(n){throw n.message="TaskQueue: Error resolving Promise in task "+t.name+": "+n.message,n}).done()}}]),t})();m.exports=u},212,[19,5,6,25]); -__d(function(g,r,i,a,m,e,d){'use strict';function t(){if("undefined"==typeof Reflect||!Reflect.construct)return!1;if(Reflect.construct.sham)return!1;if("function"==typeof Proxy)return!0;try{return Date.prototype.toString.call(Reflect.construct(Date,[],function(){})),!0}catch(t){return!1}}var n=1,s=(function(s){r(d[2])(l,s);var u,f,o=(u=l,f=t(),function(){var t,n=r(d[0])(u);if(f){var s=r(d[0])(this).constructor;t=Reflect.construct(n,arguments,s)}else t=n.apply(this,arguments);return r(d[1])(this,t)});function l(t){var n;r(d[3])(this,l),n=o.call(this);var s=t||{x:0,y:0};return'number'==typeof s.x&&'number'==typeof s.y?(n.x=new(r(d[4]))(s.x),n.y=new(r(d[4]))(s.y)):(r(d[5])(s.x instanceof r(d[4])&&s.y instanceof r(d[4]),"AnimatedValueXY must be initialized with an object of numbers or AnimatedValues."),n.x=s.x,n.y=s.y),n._listeners={},n}return r(d[6])(l,[{key:"setValue",value:function(t){this.x.setValue(t.x),this.y.setValue(t.y)}},{key:"setOffset",value:function(t){this.x.setOffset(t.x),this.y.setOffset(t.y)}},{key:"flattenOffset",value:function(){this.x.flattenOffset(),this.y.flattenOffset()}},{key:"extractOffset",value:function(){this.x.extractOffset(),this.y.extractOffset()}},{key:"__getValue",value:function(){return{x:this.x.__getValue(),y:this.y.__getValue()}}},{key:"resetAnimation",value:function(t){this.x.resetAnimation(),this.y.resetAnimation(),t&&t(this.__getValue())}},{key:"stopAnimation",value:function(t){this.x.stopAnimation(),this.y.stopAnimation(),t&&t(this.__getValue())}},{key:"addListener",value:function(t){var s=this,u=String(n++),f=function(n){n.value;t(s.__getValue())};return this._listeners[u]={x:this.x.addListener(f),y:this.y.addListener(f)},u}},{key:"removeListener",value:function(t){this.x.removeListener(this._listeners[t].x),this.y.removeListener(this._listeners[t].y),delete this._listeners[t]}},{key:"removeAllListeners",value:function(){this.x.removeAllListeners(),this.y.removeAllListeners(),this._listeners={}}},{key:"getLayout",value:function(){return{left:this.x,top:this.y}}},{key:"getTranslateTransform",value:function(){return[{translateX:this.x},{translateY:this.y}]}}]),l})(r(d[7]));m.exports=s},213,[12,9,7,5,203,25,6,207]); -__d(function(g,r,i,a,m,e,d){'use strict';function t(t,n){var o=Object.keys(t);if(Object.getOwnPropertySymbols){var u=Object.getOwnPropertySymbols(t);n&&(u=u.filter(function(n){return Object.getOwnPropertyDescriptor(t,n).enumerable})),o.push.apply(o,u)}return o}function n(n){for(var o=1;o1&&void 0!==arguments[1]?arguments[1]:{},o=n.iterations,u=void 0===o?-1:o,s=n.resetBeforeIteration,c=void 0===s||s,f=!1,v=0;return{start:function(n){t&&0!==u?t._isUsingNativeDriver()?t._startNativeLoop(u):(function o(){var s=arguments.length>0&&void 0!==arguments[0]?arguments[0]:{finished:!0};f||v===u||!1===s.finished?n&&n(s):(v++,c&&t.reset(),t.start(o))})():n&&n({finished:!0})},stop:function(){f=!0,t.stop()},reset:function(){v=0,f=!1,t.reset()},_startNativeLoop:function(){throw new Error('Loops run using the native driver cannot contain Animated.loop animations')},_isUsingNativeDriver:function(){return t._isUsingNativeDriver()}}},event:function(t,n){var o=new(r(d[14]).AnimatedEvent)(t,n);return o.__isNative?o:o.__getHandler()},createAnimatedComponent:r(d[16]),attachNativeEvent:r(d[14]).attachNativeEvent,forkEvent:function(t,n){return t?t instanceof r(d[14]).AnimatedEvent?(t.__addListener(n),t):function(){'function'==typeof t&&t.apply(void 0,arguments),n.apply(void 0,arguments)}:n},unforkEvent:function(t,n){t&&t instanceof r(d[14]).AnimatedEvent&&t.__removeListener(n)},Event:r(d[14]).AnimatedEvent,__PropsOnlyForTests:r(d[17])}},214,[19,215,216,217,218,219,220,213,208,221,222,225,228,203,229,206,230,232]); -__d(function(g,r,i,a,m,e,d){'use strict';function t(){if("undefined"==typeof Reflect||!Reflect.construct)return!1;if(Reflect.construct.sham)return!1;if("function"==typeof Proxy)return!0;try{return Date.prototype.toString.call(Reflect.construct(Date,[],function(){})),!0}catch(t){return!1}}var n=(function(n){r(d[2])(o,n);var _,u,c=(_=o,u=t(),function(){var t,n=r(d[0])(_);if(u){var c=r(d[0])(this).constructor;t=Reflect.construct(n,arguments,c)}else t=n.apply(this,arguments);return r(d[1])(this,t)});function o(t,n){var _;return r(d[3])(this,o),(_=c.call(this))._a='number'==typeof t?new(r(d[4]))(t):t,_._b='number'==typeof n?new(r(d[4]))(n):n,_}return r(d[5])(o,[{key:"__makeNative",value:function(){this._a.__makeNative(),this._b.__makeNative(),r(d[6])(r(d[0])(o.prototype),"__makeNative",this).call(this)}},{key:"__getValue",value:function(){return this._a.__getValue()+this._b.__getValue()}},{key:"interpolate",value:function(t){return new(r(d[7]))(this,t)}},{key:"__attach",value:function(){this._a.__addChild(this),this._b.__addChild(this)}},{key:"__detach",value:function(){this._a.__removeChild(this),this._b.__removeChild(this),r(d[6])(r(d[0])(o.prototype),"__detach",this).call(this)}},{key:"__getNativeConfig",value:function(){return{type:'addition',input:[this._a.__getNativeTag(),this._b.__getNativeTag()]}}}]),o})(r(d[8]));m.exports=n},215,[12,9,7,5,203,6,50,206,207]); -__d(function(g,r,i,a,m,e,d){'use strict';function t(){if("undefined"==typeof Reflect||!Reflect.construct)return!1;if(Reflect.construct.sham)return!1;if("function"==typeof Proxy)return!0;try{return Date.prototype.toString.call(Reflect.construct(Date,[],function(){})),!0}catch(t){return!1}}var n=(function(n){r(d[2])(o,n);var _,u,c=(_=o,u=t(),function(){var t,n=r(d[0])(_);if(u){var c=r(d[0])(this).constructor;t=Reflect.construct(n,arguments,c)}else t=n.apply(this,arguments);return r(d[1])(this,t)});function o(t,n){var _;return r(d[3])(this,o),(_=c.call(this))._a='number'==typeof t?new(r(d[4]))(t):t,_._b='number'==typeof n?new(r(d[4]))(n):n,_}return r(d[5])(o,[{key:"__makeNative",value:function(){this._a.__makeNative(),this._b.__makeNative(),r(d[6])(r(d[0])(o.prototype),"__makeNative",this).call(this)}},{key:"__getValue",value:function(){return this._a.__getValue()-this._b.__getValue()}},{key:"interpolate",value:function(t){return new(r(d[7]))(this,t)}},{key:"__attach",value:function(){this._a.__addChild(this),this._b.__addChild(this)}},{key:"__detach",value:function(){this._a.__removeChild(this),this._b.__removeChild(this),r(d[6])(r(d[0])(o.prototype),"__detach",this).call(this)}},{key:"__getNativeConfig",value:function(){return{type:'subtraction',input:[this._a.__getNativeTag(),this._b.__getNativeTag()]}}}]),o})(r(d[8]));m.exports=n},216,[12,9,7,5,203,6,50,206,207]); -__d(function(g,r,i,a,m,e,d){'use strict';function t(){if("undefined"==typeof Reflect||!Reflect.construct)return!1;if(Reflect.construct.sham)return!1;if("function"==typeof Proxy)return!0;try{return Date.prototype.toString.call(Reflect.construct(Date,[],function(){})),!0}catch(t){return!1}}var n=(function(n){r(d[2])(c,n);var _,o,u=(_=c,o=t(),function(){var t,n=r(d[0])(_);if(o){var u=r(d[0])(this).constructor;t=Reflect.construct(n,arguments,u)}else t=n.apply(this,arguments);return r(d[1])(this,t)});function c(t,n){var _;return r(d[3])(this,c),(_=u.call(this))._a='number'==typeof t?new(r(d[4]))(t):t,_._b='number'==typeof n?new(r(d[4]))(n):n,_}return r(d[5])(c,[{key:"__makeNative",value:function(){this._a.__makeNative(),this._b.__makeNative(),r(d[6])(r(d[0])(c.prototype),"__makeNative",this).call(this)}},{key:"__getValue",value:function(){var t=this._a.__getValue(),n=this._b.__getValue();return 0===n&&console.error('Detected division by zero in AnimatedDivision'),t/n}},{key:"interpolate",value:function(t){return new(r(d[7]))(this,t)}},{key:"__attach",value:function(){this._a.__addChild(this),this._b.__addChild(this)}},{key:"__detach",value:function(){this._a.__removeChild(this),this._b.__removeChild(this),r(d[6])(r(d[0])(c.prototype),"__detach",this).call(this)}},{key:"__getNativeConfig",value:function(){return{type:'division',input:[this._a.__getNativeTag(),this._b.__getNativeTag()]}}}]),c})(r(d[8]));m.exports=n},217,[12,9,7,5,203,6,50,206,207]); -__d(function(g,r,i,a,m,e,d){'use strict';function t(){if("undefined"==typeof Reflect||!Reflect.construct)return!1;if(Reflect.construct.sham)return!1;if("function"==typeof Proxy)return!0;try{return Date.prototype.toString.call(Reflect.construct(Date,[],function(){})),!0}catch(t){return!1}}var n=(function(n){r(d[2])(o,n);var _,u,c=(_=o,u=t(),function(){var t,n=r(d[0])(_);if(u){var c=r(d[0])(this).constructor;t=Reflect.construct(n,arguments,c)}else t=n.apply(this,arguments);return r(d[1])(this,t)});function o(t,n){var _;return r(d[3])(this,o),(_=c.call(this))._a='number'==typeof t?new(r(d[4]))(t):t,_._b='number'==typeof n?new(r(d[4]))(n):n,_}return r(d[5])(o,[{key:"__makeNative",value:function(){this._a.__makeNative(),this._b.__makeNative(),r(d[6])(r(d[0])(o.prototype),"__makeNative",this).call(this)}},{key:"__getValue",value:function(){return this._a.__getValue()*this._b.__getValue()}},{key:"interpolate",value:function(t){return new(r(d[7]))(this,t)}},{key:"__attach",value:function(){this._a.__addChild(this),this._b.__addChild(this)}},{key:"__detach",value:function(){this._a.__removeChild(this),this._b.__removeChild(this),r(d[6])(r(d[0])(o.prototype),"__detach",this).call(this)}},{key:"__getNativeConfig",value:function(){return{type:'multiplication',input:[this._a.__getNativeTag(),this._b.__getNativeTag()]}}}]),o})(r(d[8]));m.exports=n},218,[12,9,7,5,203,6,50,206,207]); -__d(function(g,r,i,a,m,e,d){'use strict';function t(){if("undefined"==typeof Reflect||!Reflect.construct)return!1;if(Reflect.construct.sham)return!1;if("function"==typeof Proxy)return!0;try{return Date.prototype.toString.call(Reflect.construct(Date,[],function(){})),!0}catch(t){return!1}}var u=(function(u){r(d[2])(s,u);var n,o,c=(n=s,o=t(),function(){var t,u=r(d[0])(n);if(o){var c=r(d[0])(this).constructor;t=Reflect.construct(u,arguments,c)}else t=u.apply(this,arguments);return r(d[1])(this,t)});function s(t,u){var n;return r(d[3])(this,s),(n=c.call(this))._a=t,n._modulus=u,n}return r(d[4])(s,[{key:"__makeNative",value:function(){this._a.__makeNative(),r(d[5])(r(d[0])(s.prototype),"__makeNative",this).call(this)}},{key:"__getValue",value:function(){return(this._a.__getValue()%this._modulus+this._modulus)%this._modulus}},{key:"interpolate",value:function(t){return new(r(d[6]))(this,t)}},{key:"__attach",value:function(){this._a.__addChild(this)}},{key:"__detach",value:function(){this._a.__removeChild(this),r(d[5])(r(d[0])(s.prototype),"__detach",this).call(this)}},{key:"__getNativeConfig",value:function(){return{type:'modulus',input:this._a.__getNativeTag(),modulus:this._modulus}}}]),s})(r(d[7]));m.exports=u},219,[12,9,7,5,6,50,206,207]); -__d(function(g,r,i,a,m,e,d){'use strict';function t(){if("undefined"==typeof Reflect||!Reflect.construct)return!1;if(Reflect.construct.sham)return!1;if("function"==typeof Proxy)return!0;try{return Date.prototype.toString.call(Reflect.construct(Date,[],function(){})),!0}catch(t){return!1}}var n=(function(n){r(d[2])(s,n);var u,_,c=(u=s,_=t(),function(){var t,n=r(d[0])(u);if(_){var c=r(d[0])(this).constructor;t=Reflect.construct(n,arguments,c)}else t=n.apply(this,arguments);return r(d[1])(this,t)});function s(t,n,u){var _;return r(d[3])(this,s),(_=c.call(this))._a=t,_._min=n,_._max=u,_._value=_._lastValue=_._a.__getValue(),_}return r(d[4])(s,[{key:"__makeNative",value:function(){this._a.__makeNative(),r(d[5])(r(d[0])(s.prototype),"__makeNative",this).call(this)}},{key:"interpolate",value:function(t){return new(r(d[6]))(this,t)}},{key:"__getValue",value:function(){var t=this._a.__getValue(),n=t-this._lastValue;return this._lastValue=t,this._value=Math.min(Math.max(this._value+n,this._min),this._max),this._value}},{key:"__attach",value:function(){this._a.__addChild(this)}},{key:"__detach",value:function(){this._a.__removeChild(this),r(d[5])(r(d[0])(s.prototype),"__detach",this).call(this)}},{key:"__getNativeConfig",value:function(){return{type:'diffclamp',input:this._a.__getNativeTag(),min:this._min,max:this._max}}}]),s})(r(d[7]));m.exports=n},220,[12,9,7,5,6,50,206,207]); -__d(function(g,r,i,a,m,e,d){'use strict';function t(t,n){var o=Object.keys(t);if(Object.getOwnPropertySymbols){var c=Object.getOwnPropertySymbols(t);n&&(c=c.filter(function(n){return Object.getOwnPropertyDescriptor(t,n).enumerable})),o.push.apply(o,c)}return o}function n(n){for(var o=1;o0,'Stiffness value must be greater than 0'),r(d[5])(p._damping>0,'Damping value must be greater than 0'),r(d[5])(p._mass>0,'Mass value must be greater than 0'),p}return r(d[7])(h,[{key:"__getNativeAnimationConfig",value:function(){var t;return{type:'spring',overshootClamping:this._overshootClamping,restDisplacementThreshold:this._restDisplacementThreshold,restSpeedThreshold:this._restSpeedThreshold,stiffness:this._stiffness,damping:this._damping,mass:this._mass,initialVelocity:null!=(t=this._initialVelocity)?t:this._lastVelocity,toValue:this._toValue,iterations:this.__iterations}}},{key:"start",value:function(t,s,n,o,l){var _=this;if(this.__active=!0,this._startPosition=t,this._lastPosition=this._startPosition,this._onUpdate=s,this.__onEnd=n,this._lastTime=Date.now(),this._frameTime=0,o instanceof h){var u=o.getInternalState();this._lastPosition=u.lastPosition,this._lastVelocity=u.lastVelocity,this._initialVelocity=this._lastVelocity,this._lastTime=u.lastTime}var f=function(){_._useNativeDriver?_.__startNativeAnimation(l):_.onUpdate()};this._delay?this._timeout=setTimeout(f,this._delay):f()}},{key:"getInternalState",value:function(){return{lastPosition:this._lastPosition,lastVelocity:this._lastVelocity,lastTime:this._lastTime}}},{key:"onUpdate",value:function(){var t=Date.now();t>this._lastTime+64&&(t=this._lastTime+64);var s=(t-this._lastTime)/1e3;this._frameTime+=s;var n=this._damping,o=this._mass,l=this._stiffness,h=-this._initialVelocity,_=n/(2*Math.sqrt(l*o)),u=Math.sqrt(l/o),f=u*Math.sqrt(1-_*_),c=this._toValue-this._startPosition,v=0,p=0,y=this._frameTime;if(_<1){var V=Math.exp(-_*u*y);v=this._toValue-V*((h+_*u*c)/f*Math.sin(f*y)+c*Math.cos(f*y)),p=_*u*V*(Math.sin(f*y)*(h+_*u*c)/f+c*Math.cos(f*y))-V*(Math.cos(f*y)*(h+_*u*c)-f*c*Math.sin(f*y))}else{var T=Math.exp(-u*y);v=this._toValue-T*(c+(h+u*c)*y),p=T*(h*(y*u-1)+y*c*(u*u))}if(this._lastTime=t,this._lastPosition=v,this._lastVelocity=p,this._onUpdate(v),this.__active){var b=!1;this._overshootClamping&&0!==this._stiffness&&(b=this._startPositionthis._toValue:v18&&A<=44?p(A):h(A),s(2*M-M*M,v,.01));return{stiffness:n(x),damping:t(B)}}}},223,[]); -__d(function(g,r,i,a,m,e,d){'use strict';var n=(function(){function n(){r(d[0])(this,n)}return r(d[1])(n,[{key:"start",value:function(n,t,o,_,u){}},{key:"stop",value:function(){this.__nativeId&&r(d[2]).API.stopAnimation(this.__nativeId)}},{key:"__getNativeAnimationConfig",value:function(){throw new Error('This animation type cannot be offloaded to native')}},{key:"__debouncedOnEnd",value:function(n){var t=this.__onEnd;this.__onEnd=null,t&&t(n)}},{key:"__startNativeAnimation",value:function(n){r(d[2]).API.enableQueue(),n.__makeNative(),r(d[2]).API.disableQueue(),this.__nativeId=r(d[2]).generateNewAnimationId(),r(d[2]).API.startAnimatingNode(this.__nativeId,n.__getNativeTag(),this.__getNativeAnimationConfig(),this.__debouncedOnEnd.bind(this))}}]),n})();m.exports=n},224,[5,6,204]); -__d(function(g,r,i,a,m,e,d){'use strict';function t(){if("undefined"==typeof Reflect||!Reflect.construct)return!1;if(Reflect.construct.sham)return!1;if("function"==typeof Proxy)return!0;try{return Date.prototype.toString.call(Reflect.construct(Date,[],function(){})),!0}catch(t){return!1}}var n;function s(){if(!n){var t=r(d[2]);n=t.inOut(t.ease)}return n}var o=(function(n){r(d[3])(h,n);var o,u,_=(o=h,u=t(),function(){var t,n=r(d[0])(o);if(u){var s=r(d[0])(this).constructor;t=Reflect.construct(n,arguments,s)}else t=n.apply(this,arguments);return r(d[1])(this,t)});function h(t){var n,o,u,l,c,f;return r(d[4])(this,h),(f=_.call(this))._toValue=t.toValue,f._easing=null!=(n=t.easing)?n:s(),f._duration=null!=(o=t.duration)?o:500,f._delay=null!=(u=t.delay)?u:0,f.__iterations=null!=(l=t.iterations)?l:1,f._useNativeDriver=r(d[5]).shouldUseNativeDriver(t),f.__isInteraction=null!=(c=t.isInteraction)?c:!f._useNativeDriver,f}return r(d[6])(h,[{key:"__getNativeAnimationConfig",value:function(){for(var t=[],n=Math.round(this._duration/16.666666666666668),s=0;s=this._startTime+this._duration)return 0===this._duration?this._onUpdate(this._toValue):this._onUpdate(this._fromValue+this._easing(1)*(this._toValue-this._fromValue)),void this.__debouncedOnEnd({finished:!0});this._onUpdate(this._fromValue+this._easing((t-this._startTime)/this._duration)*(this._toValue-this._fromValue)),this.__active&&(this._animationFrame=requestAnimationFrame(this.onUpdate.bind(this)))}},{key:"stop",value:function(){r(d[7])(r(d[0])(h.prototype),"stop",this).call(this),this.__active=!1,clearTimeout(this._timeout),g.cancelAnimationFrame(this._animationFrame),this.__debouncedOnEnd({finished:!1})}}]),h})(r(d[8]));m.exports=o},225,[12,9,226,7,5,204,6,50,224]); -__d(function(g,r,i,a,m,e,d){'use strict';var n,u=(function(){function u(){r(d[0])(this,u)}return r(d[1])(u,null,[{key:"step0",value:function(n){return n>0?1:0}},{key:"step1",value:function(n){return n>=1?1:0}},{key:"linear",value:function(n){return n}},{key:"ease",value:function(t){return n||(n=u.bezier(.42,0,1,1)),n(t)}},{key:"quad",value:function(n){return n*n}},{key:"cubic",value:function(n){return n*n*n}},{key:"poly",value:function(n){return function(u){return Math.pow(u,n)}}},{key:"sin",value:function(n){return 1-Math.cos(n*Math.PI/2)}},{key:"circle",value:function(n){return 1-Math.sqrt(1-n*n)}},{key:"exp",value:function(n){return Math.pow(2,10*(n-1))}},{key:"elastic",value:function(){var n=(arguments.length>0&&void 0!==arguments[0]?arguments[0]:1)*Math.PI;return function(u){return 1-Math.pow(Math.cos(u*Math.PI/2),3)*Math.cos(u*n)}}},{key:"back",value:function(){var n=arguments.length>0&&void 0!==arguments[0]?arguments[0]:1.70158;return function(u){return u*u*((n+1)*u-n)}}},{key:"bounce",value:function(n){if(n<.36363636363636365)return 7.5625*n*n;if(n<.7272727272727273){var u=n-.5454545454545454;return 7.5625*u*u+.75}if(n<.9090909090909091){var t=n-.8181818181818182;return 7.5625*t*t+.9375}var o=n-.9545454545454546;return 7.5625*o*o+.984375}},{key:"bezier",value:function(n,u,t,o){return r(d[2])(n,u,t,o)}},{key:"in",value:function(n){return n}},{key:"out",value:function(n){return function(u){return 1-n(1-u)}}},{key:"inOut",value:function(n){return function(u){return u<.5?n(2*u)/2:1-n(2*(1-u))/2}}}]),u})();m.exports=u},226,[5,6,227]); -__d(function(g,r,i,a,m,e,d){'use strict';var n=4,t=.001,u=1e-7,o=10,f=.1,c='function'==typeof Float32Array;function v(n,t){return 1-3*t+3*n}function s(n,t){return 3*t-6*n}function w(n){return 3*n}function l(n,t,u){return((v(t,u)*n+s(t,u))*n+w(t))*n}function y(n,t,u){return 3*v(t,u)*n*n+2*s(t,u)*n+w(t)}function b(n,t,f,c,v){var s,w,y=0,b=t,h=f;do{(s=l(w=b+(h-b)/2,c,v)-n)>0?h=w:b=w}while(Math.abs(s)>u&&++y=0&&n<=1&&o>=0&&o<=1))throw new Error('bezier x values must be in [0, 1] range');var s=c?new Float32Array(11):new Array(11);if(n!==u||o!==v)for(var w=0;w<11;++w)s[w]=l(w*f,n,o);function A(u){for(var c=0,v=1;10!==v&&s[v]<=u;++v)c+=f;var w=c+(u-s[--v])/(s[v+1]-s[v])*f,l=y(w,n,o);return l>=t?h(u,w,n,o):0===l?w:b(u,c,c+f,n,o)}return function(t){return n===u&&o===v?t:0===t?0:1===t?1:l(A(t),u,v)}}},227,[]); -__d(function(g,r,i,a,m,e,d){'use strict';function t(){if("undefined"==typeof Reflect||!Reflect.construct)return!1;if(Reflect.construct.sham)return!1;if("function"==typeof Proxy)return!0;try{return Date.prototype.toString.call(Reflect.construct(Date,[],function(){})),!0}catch(t){return!1}}var n=(function(n){r(d[2])(u,n);var s,o,c=(s=u,o=t(),function(){var t,n=r(d[0])(s);if(o){var c=r(d[0])(this).constructor;t=Reflect.construct(n,arguments,c)}else t=n.apply(this,arguments);return r(d[1])(this,t)});function u(t){var n,s,o,_;return r(d[3])(this,u),(_=c.call(this))._deceleration=null!=(n=t.deceleration)?n:.998,_._velocity=t.velocity,_._useNativeDriver=r(d[4]).shouldUseNativeDriver(t),_.__isInteraction=null!=(s=t.isInteraction)?s:!_._useNativeDriver,_.__iterations=null!=(o=t.iterations)?o:1,_}return r(d[5])(u,[{key:"__getNativeAnimationConfig",value:function(){return{type:'decay',deceleration:this._deceleration,velocity:this._velocity,iterations:this.__iterations}}},{key:"start",value:function(t,n,s,o,c){this.__active=!0,this._lastValue=t,this._fromValue=t,this._onUpdate=n,this.__onEnd=s,this._startTime=Date.now(),this._useNativeDriver?this.__startNativeAnimation(c):this._animationFrame=requestAnimationFrame(this.onUpdate.bind(this))}},{key:"onUpdate",value:function(){var t=Date.now(),n=this._fromValue+this._velocity/(1-this._deceleration)*(1-Math.exp(-(1-this._deceleration)*(t-this._startTime)));this._onUpdate(n),Math.abs(this._lastValue-n)<.1?this.__debouncedOnEnd({finished:!0}):(this._lastValue=n,this.__active&&(this._animationFrame=requestAnimationFrame(this.onUpdate.bind(this))))}},{key:"stop",value:function(){r(d[6])(r(d[0])(u.prototype),"stop",this).call(this),this.__active=!1,g.cancelAnimationFrame(this._animationFrame),this.__debouncedOnEnd({finished:!1})}}]),u})(r(d[7]));m.exports=n},228,[12,9,7,5,204,6,50,224]); -__d(function(g,r,i,a,m,e,d){'use strict';function t(t,n,s){var v=[];r(d[1])(s[0]&&s[0].nativeEvent,'Native driven events only support animated values contained inside `nativeEvent`.'),(function t(n,s){if(n instanceof r(d[0]))n.__makeNative(),v.push({nativeEventPath:s,animatedValueTag:n.__getNativeTag()});else if('object'==typeof n)for(var o in n)t(n[o],s.concat(o))})(s[0].nativeEvent,[]);var o=r(d[2]).findNodeHandle(t);return null!=o&&v.forEach(function(t){r(d[3]).API.addAnimatedEventToView(o,n,t)}),{detach:function(){null!=o&&v.forEach(function(t){r(d[3]).API.removeAnimatedEventFromView(o,n,t.animatedValueTag)})}}}var n=(function(){function n(t,s){r(d[4])(this,n),this._listeners=[],this._argMapping=t,null==s&&(console.warn('Animated.event now requires a second argument for options'),s={}),s.listener&&this.__addListener(s.listener),this._callListeners=this._callListeners.bind(this),this._attachedEvent=null,this.__isNative=r(d[3]).shouldUseNativeDriver(s)}return r(d[5])(n,[{key:"__addListener",value:function(t){this._listeners.push(t)}},{key:"__removeListener",value:function(t){this._listeners=this._listeners.filter(function(n){return n!==t})}},{key:"__attach",value:function(n,s){r(d[1])(this.__isNative,'Only native driven events need to be attached.'),this._attachedEvent=t(n,s,this._argMapping)}},{key:"__detach",value:function(t,n){r(d[1])(this.__isNative,'Only native driven events need to be detached.'),this._attachedEvent&&this._attachedEvent.detach()}},{key:"__getHandler",value:function(){var t=this;return this.__isNative?this._callListeners:function(){for(var n=arguments.length,s=new Array(n),v=0;v>'),n})}}),t}return r(d[6])(p,[{key:"_attachNativeEvents",value:function(){var t,n=this,o=null!=(t=this._component)&&t.getScrollableNode?this._component.getScrollableNode():this._component,s=function(t){var s=n.props[t];s instanceof r(d[7]).AnimatedEvent&&s.__isNative&&(s.__attach(o,t),n._eventDetachers.push(function(){return s.__detach(o,t)}))};for(var c in this.props)s(c)}},{key:"_detachNativeEvents",value:function(){this._eventDetachers.forEach(function(t){return t()}),this._eventDetachers=[]}},{key:"_attachProps",value:function(t){var n=this._propsAnimated;this._propsAnimated=new(r(d[8]))(t,this._animatedPropsCallback),n&&(n.__restoreDefaultValues(),n.__detach())}},{key:"render",value:function(){var t=this._propsAnimated.__getValue();return r(d[9]).createElement(n,r(d[10])({},t,{ref:this._setComponentRef,collapsable:!this._propsAnimated.__isNative&&t.collapsable}))}},{key:"UNSAFE_componentWillMount",value:function(){this._attachProps(this.props)}},{key:"componentDidMount",value:function(){this._invokeAnimatedPropsCallbackOnMount&&(this._invokeAnimatedPropsCallbackOnMount=!1,this._animatedPropsCallback()),this._propsAnimated.setNativeView(this._component),this._attachNativeEvents()}},{key:"UNSAFE_componentWillReceiveProps",value:function(t){this._attachProps(t)}},{key:"componentDidUpdate",value:function(t){this._component!==this._prevComponent&&this._propsAnimated.setNativeView(this._component),this._component===this._prevComponent&&t===this.props||(this._detachNativeEvents(),this._attachNativeEvents())}},{key:"componentWillUnmount",value:function(){this._propsAnimated&&this._propsAnimated.__detach(),this._detachNativeEvents()}}]),p})(r(d[9]).Component);return r(d[9]).forwardRef(function(t,n){return r(d[9]).createElement(o,r(d[10])({},t,null==n?null:{forwardedRef:n}))})}},230,[12,9,25,7,5,231,6,229,232,14,17]); -__d(function(g,r,i,a,m,e,d){'use strict';m.exports=function(t){var n=t.getForwardedRef,o=t.setLocalRef;return function(t){var c=n();o(t),'function'==typeof c?c(t):'object'==typeof c&&null!=c&&(c.current=t)}}},231,[]); -__d(function(g,r,i,a,m,e,d){'use strict';function t(t,n){var o=Object.keys(t);if(Object.getOwnPropertySymbols){var s=Object.getOwnPropertySymbols(t);n&&(s=s.filter(function(n){return Object.getOwnPropertyDescriptor(t,n).enumerable})),o.push.apply(o,s)}return o}function n(n){for(var o=1;o1){for(var s=[],u=0;u1?Math.ceil(t.length/o):t.length}return 0},n._keyExtractor=function(t,o){var l=n.props,s=l.keyExtractor,u=l.numColumns;return u>1?(r(d[11])(Array.isArray(t),"FlatList: Encountered internal consistency error, expected each item to consist of an array with 1-%s columns; instead, received a single item.",u),t.map(function(t,n){return s(t,o*u+n)}).join(':')):s(t,o)},n._renderer=function(){var t=n.props,o=t.ListItemComponent,l=t.renderItem,s=t.numColumns,u=t.columnWrapperStyle,c=o?'ListItemComponent':'renderItem',p=function(t){return o?r(d[12]).createElement(o,t):l?l(t):null};return(0,f.default)({},c,function(t){if(s>1){var n=t.item,o=t.index;return r(d[11])(Array.isArray(n),'Expected array of items with numColumns > 1'),r(d[12]).createElement(r(d[13]),{style:r(d[14]).compose(w.row,u)},n.map(function(n,l){var u=p({item:n,index:o*s+l,separators:t.separators});return null!=u?r(d[12]).createElement(r(d[12]).Fragment,{key:l},u):null}))}return p(t)})},n._checkProps(n.props),n.props.viewabilityConfigCallbackPairs?n._virtualizedListPairs=n.props.viewabilityConfigCallbackPairs.map(function(t){return{viewabilityConfig:t.viewabilityConfig,onViewableItemsChanged:n._createOnViewableItemsChanged(t.onViewableItemsChanged)}}):n.props.onViewableItemsChanged&&n._virtualizedListPairs.push({viewabilityConfig:n.props.viewabilityConfig,onViewableItemsChanged:n._createOnViewableItemsChanged(n.props.onViewableItemsChanged)}),n}return(0,l.default)(k,[{key:"scrollToEnd",value:function(t){this._listRef&&this._listRef.scrollToEnd(t)}},{key:"scrollToIndex",value:function(t){this._listRef&&this._listRef.scrollToIndex(t)}},{key:"scrollToItem",value:function(t){this._listRef&&this._listRef.scrollToItem(t)}},{key:"scrollToOffset",value:function(t){this._listRef&&this._listRef.scrollToOffset(t)}},{key:"recordInteraction",value:function(){this._listRef&&this._listRef.recordInteraction()}},{key:"flashScrollIndicators",value:function(){this._listRef&&this._listRef.flashScrollIndicators()}},{key:"getScrollResponder",value:function(){if(this._listRef)return this._listRef.getScrollResponder()}},{key:"getNativeScrollRef",value:function(){if(this._listRef){var t=this._listRef.getScrollRef();if(null!=t)return t instanceof p.default?t.getNativeScrollRef():t}}},{key:"getScrollableNode",value:function(){if(this._listRef)return this._listRef.getScrollableNode()}},{key:"setNativeProps",value:function(t){this._listRef&&this._listRef.setNativeProps(t)}}]),(0,l.default)(k,[{key:"componentDidUpdate",value:function(t){r(d[11])(t.numColumns===this.props.numColumns,"Changing numColumns on the fly is not supported. Change the key prop on FlatList when changing the number of columns to force a fresh render of the component."),r(d[11])(t.onViewableItemsChanged===this.props.onViewableItemsChanged,'Changing onViewableItemsChanged on the fly is not supported'),r(d[11])(!r(d[15])(t.viewabilityConfig,this.props.viewabilityConfig),'Changing viewabilityConfig on the fly is not supported'),r(d[11])(t.viewabilityConfigCallbackPairs===this.props.viewabilityConfigCallbackPairs,'Changing viewabilityConfigCallbackPairs on the fly is not supported'),this._checkProps(this.props)}},{key:"_checkProps",value:function(t){var n=t.getItem,o=t.getItemCount,l=t.horizontal,s=t.numColumns,u=t.columnWrapperStyle,c=t.onViewableItemsChanged,f=t.viewabilityConfigCallbackPairs;r(d[11])(!n&&!o,'FlatList does not support custom data formats.'),s>1?r(d[11])(!l,'numColumns does not support horizontal.'):r(d[11])(!u,'columnWrapperStyle not supported for single column lists'),r(d[11])(!(c&&f),"FlatList does not support setting both onViewableItemsChanged and viewabilityConfigCallbackPairs.")}},{key:"_pushMultiColumnViewable",value:function(t,n){var o=this.props,l=o.numColumns,s=o.keyExtractor;n.item.forEach(function(o,u){r(d[11])(null!=n.index,'Missing index!');var c=n.index*l+u;t.push(v(v({},n),{},{item:o,key:s(o,c),index:c}))})}},{key:"_createOnViewableItemsChanged",value:function(t){var n=this;return function(o){var l=n.props.numColumns;if(t)if(l>1){var s=[],u=[];o.viewableItems.forEach(function(t){return n._pushMultiColumnViewable(u,t)}),o.changed.forEach(function(t){return n._pushMultiColumnViewable(s,t)}),t({viewableItems:u,changed:s})}else t(o)}}},{key:"render",value:function(){var o=this.props,l=(o.numColumns,o.columnWrapperStyle,(0,n.default)(o,["numColumns","columnWrapperStyle"]));return r(d[12]).createElement(r(d[10]),(0,t.default)({},l,{getItem:this._getItem,getItemCount:this._getItemCount,keyExtractor:this._keyExtractor,ref:this._captureRef,viewabilityConfigCallbackPairs:this._virtualizedListPairs},this._renderer()))}}]),k})(r(d[12]).PureComponent);b.defaultProps=C;var w=r(d[14]).create({row:{flexDirection:'row'}});m.exports=b},236,[1,17,126,5,6,7,9,12,19,237,257,25,14,128,191,92]); -__d(function(g,r,i,a,m,e,d){'use strict';var t,n,o=r(d[0])(r(d[1])),l=r(d[0])(r(d[2])),s=r(d[0])(r(d[3])),c=r(d[0])(r(d[4])),p=r(d[0])(r(d[5])),u=r(d[0])(r(d[6])),h=r(d[0])(r(d[7])),f=r(d[0])(r(d[8])),y=r(d[0])(r(d[9])),R=r(d[0])(r(d[10]));r(d[0])(r(d[11])),r(d[0])(r(d[12]));function v(){if("undefined"==typeof Reflect||!Reflect.construct)return!1;if(Reflect.construct.sham)return!1;if("function"==typeof Proxy)return!0;try{return Date.prototype.toString.call(Reflect.construct(Date,[],function(){})),!0}catch(t){return!1}}function _(t,n){var o=Object.keys(t);if(Object.getOwnPropertySymbols){var l=Object.getOwnPropertySymbols(t);n&&(l=l.filter(function(n){return Object.getOwnPropertyDescriptor(t,n).enumerable})),o.push.apply(o,l)}return o}function S(t){for(var n=1;n0&&(this._scrollAnimatedValueAttachment=r(d[16]).attachNativeEvent(this._scrollViewRef,'onScroll',[{nativeEvent:{contentOffset:{y:this._scrollAnimatedValue}}}]))}},{key:"_setStickyHeaderRef",value:function(t,n){n?this._stickyHeaderRefs.set(t,n):this._stickyHeaderRefs.delete(t)}},{key:"_onStickyHeaderLayout",value:function(t,n,o){var l=this.props.stickyHeaderIndices;if(l){var s=r(d[15]).Children.toArray(this.props.children);if(o===this._getKeyForIndex(t,s)){var c=n.nativeEvent.layout.y;this._headerLayoutYs.set(o,c);var p=l[l.indexOf(t)-1];if(null!=p){var u=this._stickyHeaderRefs.get(this._getKeyForIndex(p,s));u&&u.setNextHeaderY&&u.setNextHeaderY(c)}}}}},{key:"render",value:function(){var l,s,c=this;l=t,s=n,r(d[17])(void 0!==l,'ScrollViewClass must not be undefined'),r(d[17])(void 0!==s,'ScrollContentContainerViewClass must not be undefined');var p=[!0===this.props.horizontal&&b.contentContainerHorizontal,this.props.contentContainerStyle],u={};this.props.onContentSizeChange&&(u={onLayout:this._handleContentOnLayout});var h=this.props.stickyHeaderIndices,f=this.props.children;if(null!=h&&h.length>0){var y=r(d[15]).Children.toArray(this.props.children);f=y.map(function(t,n){var o=t?h.indexOf(n):-1;if(o>-1){var l=t.key,s=h[o+1],p=c.props.StickyHeaderComponent||r(d[18]);return r(d[15]).createElement(p,{key:l,ref:function(t){return c._setStickyHeaderRef(l,t)},nextHeaderLayoutY:c._headerLayoutYs.get(c._getKeyForIndex(s,y)),onLayout:function(t){return c._onStickyHeaderLayout(n,t,l)},scrollAnimatedValue:c._scrollAnimatedValue,inverted:c.props.invertStickyHeaders,scrollViewHeight:c.state.layoutHeight},t)}return t})}f=r(d[15]).createElement(H.Provider,{value:!0===this.props.horizontal?k:V},f);var R=Array.isArray(h)&&h.length>0,v=r(d[15]).createElement(s,(0,o.default)({},u,{ref:this._setInnerViewRef,style:p,removeClippedSubviews:this.props.removeClippedSubviews,collapsable:!1}),f),_=void 0!==this.props.alwaysBounceHorizontal?this.props.alwaysBounceHorizontal:this.props.horizontal,w=void 0!==this.props.alwaysBounceVertical?this.props.alwaysBounceVertical:!this.props.horizontal,E=!!this.props.DEPRECATED_sendUpdatedChildFrames,A=!0===this.props.horizontal?b.baseHorizontal:b.baseVertical,C=S(S({},this.props),{},{alwaysBounceHorizontal:_,alwaysBounceVertical:w,style:[A,this.props.style],onContentSizeChange:null,onLayout:this._handleLayout,onMomentumScrollBegin:this._scrollResponder.scrollResponderHandleMomentumScrollBegin,onMomentumScrollEnd:this._scrollResponder.scrollResponderHandleMomentumScrollEnd,onResponderGrant:this._scrollResponder.scrollResponderHandleResponderGrant,onResponderReject:this._scrollResponder.scrollResponderHandleResponderReject,onResponderRelease:this._scrollResponder.scrollResponderHandleResponderRelease,onResponderTerminationRequest:this._scrollResponder.scrollResponderHandleTerminationRequest,onScrollBeginDrag:this._scrollResponder.scrollResponderHandleScrollBeginDrag,onScrollEndDrag:this._scrollResponder.scrollResponderHandleScrollEndDrag,onScrollShouldSetResponder:this._scrollResponder.scrollResponderHandleScrollShouldSetResponder,onStartShouldSetResponder:this._scrollResponder.scrollResponderHandleStartShouldSetResponder,onStartShouldSetResponderCapture:this._scrollResponder.scrollResponderHandleStartShouldSetResponderCapture,onTouchEnd:this._scrollResponder.scrollResponderHandleTouchEnd,onTouchMove:this._scrollResponder.scrollResponderHandleTouchMove,onTouchStart:this._scrollResponder.scrollResponderHandleTouchStart,onTouchCancel:this._scrollResponder.scrollResponderHandleTouchCancel,onScroll:this._handleScroll,scrollBarThumbImage:r(d[19])(this.props.scrollBarThumbImage),scrollEventThrottle:R?1:this.props.scrollEventThrottle,sendMomentumEvents:!(!this.props.onMomentumScrollBegin&&!this.props.onMomentumScrollEnd),DEPRECATED_sendUpdatedChildFrames:E,snapToStart:!1!==this.props.snapToStart,snapToEnd:!1!==this.props.snapToEnd,pagingEnabled:!0===this.props.pagingEnabled&&null==this.props.snapToInterval&&null==this.props.snapToOffsets}),x=this.props.decelerationRate;null!=x&&(C.decelerationRate=r(d[20])(x));var O=this.props.refreshControl;return O?r(d[15]).createElement(l,(0,o.default)({},C,{ref:this._setScrollViewRef}),r(d[21]).isTV?null:O,v):r(d[15]).createElement(l,(0,o.default)({},C,{ref:this._setScrollViewRef}),v)}}]),E})(r(d[15]).Component);E.Context=H;var b=r(d[22]).create({baseVertical:{flexGrow:1,flexShrink:1,flexDirection:'column',overflow:'scroll'},baseHorizontal:{flexGrow:1,flexShrink:1,flexDirection:'row',overflow:'scroll'},contentContainerHorizontal:{flexDirection:'row'}});m.exports=E},237,[1,17,5,6,11,7,9,12,19,238,244,245,246,131,247,14,214,25,255,119,256,82,191]); -__d(function(g,r,i,a,m,e,d){'use strict';Object.defineProperty(e,"__esModule",{value:!0}),e.default=void 0;var l,t=r(d[0])(r(d[1]));g.RN$Bridgeless?(r(d[2])('RCTScrollView',t.default),l='RCTScrollView'):l=r(d[3])('RCTScrollView');var o=l;e.default=o},238,[1,239,240,58]); -__d(function(g,r,i,a,m,e,d){'use strict';var o={uiViewClassName:'RCTScrollView',bubblingEventTypes:{},directEventTypes:{topScrollToTop:{registrationName:'onScrollToTop'}},validAttributes:{alwaysBounceHorizontal:!0,alwaysBounceVertical:!0,automaticallyAdjustContentInsets:!0,bounces:!0,bouncesZoom:!0,canCancelContentTouches:!0,centerContent:!0,contentInset:{diff:r(d[0])},contentOffset:{diff:r(d[0])},contentInsetAdjustmentBehavior:!0,decelerationRate:!0,directionalLockEnabled:!0,disableIntervalMomentum:!0,endFillColor:{process:r(d[1])},fadingEdgeLength:!0,indicatorStyle:!0,keyboardDismissMode:!0,maintainVisibleContentPosition:!0,maximumZoomScale:!0,minimumZoomScale:!0,nestedScrollEnabled:!0,onMomentumScrollBegin:!0,onMomentumScrollEnd:!0,onScroll:!0,onScrollBeginDrag:!0,onScrollEndDrag:!0,onScrollToTop:!0,overScrollMode:!0,pagingEnabled:!0,persistentScrollbar:!0,pinchGestureEnabled:!0,scrollEnabled:!0,scrollEventThrottle:!0,scrollIndicatorInsets:{diff:r(d[0])},scrollPerfTag:!0,scrollToOverflowEnabled:!0,scrollsToTop:!0,sendMomentumEvents:!0,showsHorizontalScrollIndicator:!0,showsVerticalScrollIndicator:!0,snapToAlignment:!0,snapToEnd:!0,snapToInterval:!0,snapToOffsets:!0,snapToStart:!0,zoomScale:!0,DEPRECATED_sendUpdatedChildFrames:!0}};m.exports=o},239,[116,114]); -__d(function(g,r,i,a,m,e,d){'use strict';var t=r(d[0])(r(d[1])),n=r(d[0])(r(d[2]));function c(t,n){var c=Object.keys(t);if(Object.getOwnPropertySymbols){var o=Object.getOwnPropertySymbols(t);n&&(o=o.filter(function(n){return Object.getOwnPropertyDescriptor(t,n).enumerable})),c.push.apply(c,o)}return c}function o(n){for(var o=1;o1&&(u[o]=c)}else u[o]=n}for(var s in t)n.includes(s)||(o?t.hasOwnProperty(s)&&c(t[s],o[s],s):u[s]={});return u}var o=function(t,n){if(!g.RN$Bridgeless){var o=r(d[2])(t);['validAttributes','bubblingEventTypes','directEventTypes'].forEach(function(u){var c=Object.keys(f(o[u],n[u]));c.length&&console.error(t+" generated view config for "+u+" does not match native, missing: "+c.join(' '))})}};e.default=o},241,[1,242,95]); -__d(function(g,r,i,a,m,e,d){'use strict';var t=r(d[0])(r(d[1])),o=r(d[0])(r(d[2]));function s(t,o){var s=Object.keys(t);if(Object.getOwnPropertySymbols){var n=Object.getOwnPropertySymbols(t);o&&(n=n.filter(function(o){return Object.getOwnPropertyDescriptor(t,o).enumerable})),s.push.apply(s,n)}return s}function n(o){for(var n=1;n0){f.push(L),v.push(0),f.push(L+1),v.push(1);var x=(y||0)-c-s;x>L&&(f.push(x,x+1),v.push(x-L,x-L))}}}else{f.push(h),v.push(0);var H=(y||0)-c;H>=h?(f.push(H,H+1),v.push(H-h,H-h)):(f.push(h+1),v.push(1))}var Y=this.props.scrollAnimatedValue.interpolate({inputRange:f,outputRange:v}),R=r(d[6]).Children.only(this.props.children);return r(d[6]).createElement(n,{collapsable:!1,onLayout:this._onLayout,style:[R.props.style,u.header,{transform:[{translateY:Y}]}]},r(d[6]).cloneElement(R,{style:u.fill,onLayout:void 0}))}}]),c})(r(d[6]).Component),u=r(d[8]).create({header:{zIndex:10},fill:{flex:1}});m.exports=o},255,[12,9,214,128,7,5,14,6,191]); -__d(function(g,r,i,a,m,e,d){'use strict';m.exports=function(t){return'normal'===t?.998:'fast'===t?.99:t}},256,[]); -__d(function(g,r,i,a,m,e,d){'use strict';function t(t,n){var o;if("undefined"==typeof Symbol||null==t[Symbol.iterator]){if(Array.isArray(t)||(o=s(t))||n&&t&&"number"==typeof t.length){o&&(t=o);var l=0;return function(){return l>=t.length?{done:!0}:{done:!1,value:t[l++]}}}throw new TypeError("Invalid attempt to iterate non-iterable instance.\nIn order to be iterable, non-array objects must have a [Symbol.iterator]() method.")}return(o=t[Symbol.iterator]()).next.bind(o)}function s(t,s){if(t){if("string"==typeof t)return n(t,s);var o=Object.prototype.toString.call(t).slice(8,-1);return"Object"===o&&t.constructor&&(o=t.constructor.name),"Map"===o||"Set"===o?Array.from(t):"Arguments"===o||/^(?:Ui|I)nt(?:8|16|32)(?:Clamped)?Array$/.test(o)?n(t,s):void 0}}function n(t,s){(null==s||s>t.length)&&(s=t.length);for(var n=0,o=new Array(s);n0&&s>0&&null!=h.props.initialScrollIndex&&h.props.initialScrollIndex>0&&!h._hasDoneInitialScroll&&(h.scrollToIndex({animated:!1,index:h.props.initialScrollIndex}),h._hasDoneInitialScroll=!0),h.props.onContentSizeChange&&h.props.onContentSizeChange(t,s),h._scrollMetrics.contentLength=h._selectLength({height:s,width:t}),h._scheduleCellsToRenderUpdate(),h._maybeCallOnEndReached()},h._convertParentScrollMetrics=function(t){var s=t.offset-h._offsetFromParentVirtualizedList,n=t.visibleLength,o=s-h._scrollMetrics.offset;return{visibleLength:n,contentLength:h._scrollMetrics.contentLength,offset:s,dOffset:o}},h._onScroll=function(t){h._nestedChildLists.forEach(function(s){s.ref&&s.ref._onScroll(t)}),h.props.onScroll&&h.props.onScroll(t);var s=t.timeStamp,n=h._selectLength(t.nativeEvent.layoutMeasurement),o=h._selectLength(t.nativeEvent.contentSize),l=h._selectOffset(t.nativeEvent.contentOffset),c=l-h._scrollMetrics.offset;if(h._isNestedWithSameOrientation()){if(0===h._scrollMetrics.contentLength)return;var u=h._convertParentScrollMetrics({visibleLength:n,offset:l});n=u.visibleLength,o=u.contentLength,l=u.offset,c=u.dOffset}var p=h._scrollMetrics.timestamp?Math.max(1,s-h._scrollMetrics.timestamp):1,f=c/p;p>500&&h._scrollMetrics.dt>500&&o>5*n&&!h._hasWarned.perf&&(r(d[14])("VirtualizedList: You have a large list that is slow to update - make sure your renderItem function renders components that follow React performance best practices like PureComponent, shouldComponentUpdate, etc.",{dt:p,prevDt:h._scrollMetrics.dt,contentLength:o}),h._hasWarned.perf=!0),h._scrollMetrics={contentLength:o,dt:p,dOffset:c,offset:l,timestamp:s,velocity:f,visibleLength:n},h._updateViewableItems(h.props.data),h.props&&(h._maybeCallOnEndReached(),0!==f&&h._fillRateHelper.activate(),h._computeBlankness(),h._scheduleCellsToRenderUpdate())},h._onScrollBeginDrag=function(t){h._nestedChildLists.forEach(function(s){s.ref&&s.ref._onScrollBeginDrag(t)}),h._viewabilityTuples.forEach(function(t){t.viewabilityHelper.recordInteraction()}),h._hasInteracted=!0,h.props.onScrollBeginDrag&&h.props.onScrollBeginDrag(t)},h._onScrollEndDrag=function(t){var s=t.nativeEvent.velocity;s&&(h._scrollMetrics.velocity=h._selectOffset(s)),h._computeBlankness(),h.props.onScrollEndDrag&&h.props.onScrollEndDrag(t)},h._onMomentumScrollEnd=function(t){h._scrollMetrics.velocity=0,h._computeBlankness(),h.props.onMomentumScrollEnd&&h.props.onMomentumScrollEnd(t)},h._updateCellsToRender=function(){var s=h.props,n=s.data,o=s.getItemCount,l=s.onEndReachedThreshold,c=h._isVirtualizationDisabled();h._updateViewableItems(n),n&&h.setState(function(s){var u;if(c){var p=h._scrollMetrics,f=p.contentLength,_=p.offset,v=p.visibleLength,y=f-v-_0)for(var C=u.first,L=u.last,b=C;b<=L;b++){var S=h._indicesToKeys.get(b),I=S&&h._cellKeysToChildListKeys.get(S);if(I){for(var M,R=!1,x=t(I);!(M=x()).done;){var w=M.value,k=h._nestedChildLists.get(w);if(k&&k.ref&&k.ref.hasMore()){R=!0;break}}if(R){u.last=b;break}}}return u})},h._createViewToken=function(t,s){var n=h.props,o=n.data,l=n.getItem,c=n.keyExtractor,u=l(o,t);return{index:t,item:u,key:c(u,t),isViewable:s}},h._getFrameMetricsApprox=function(t){var s=h._getFrameMetrics(t);if(s&&s.index===t)return s;var n=h.props.getItemLayout;return r(d[5])(!n,'Should not have to estimate frames when a measurement metrics function is provided'),{length:h._averageCellLength,offset:h._averageCellLength*t}},h._getFrameMetrics=function(t){var s=h.props,n=s.data,o=s.getItem,l=s.getItemCount,c=s.getItemLayout,u=s.keyExtractor;r(d[5])(l(n)>t,'Tried to get frame for out of range index '+t);var p=o(n,t),f=p&&h._frames[u(p,t)];return f&&f.index===t||c&&(f=c(n,t)),f},r(d[5])(!s.onScroll||!s.onScroll.__isNative,"Components based on VirtualizedList must be wrapped with Animated.createAnimatedComponent to support native onScroll events with useNativeDriver"),r(d[5])(s.windowSize>0,'VirtualizedList: The windowSize prop must be present and set to a value greater than 0.'),h._fillRateHelper=new(r(d[16]))(h._getFrameMetrics),h._updateCellsToRenderBatcher=new(r(d[17]))(h._updateCellsToRender,h.props.updateCellsBatchingPeriod),h.props.viewabilityConfigCallbackPairs?h._viewabilityTuples=h.props.viewabilityConfigCallbackPairs.map(function(t){return{viewabilityHelper:new(r(d[18]))(t.viewabilityConfig),onViewableItemsChanged:t.onViewableItemsChanged}}):h.props.onViewableItemsChanged&&h._viewabilityTuples.push({viewabilityHelper:new(r(d[18]))(h.props.viewabilityConfig),onViewableItemsChanged:h.props.onViewableItemsChanged});var u={first:h.props.initialScrollIndex||0,last:Math.min(h.props.getItemCount(h.props.data),(h.props.initialScrollIndex||0)+h.props.initialNumToRender)-1};if(h._isNestedWithSameOrientation()){var p=h.context.virtualizedList.getNestedChildState(h.props.listKey||h._getCellKey());p&&(u=p,h.state=p,h._frames=p.frames)}return h.state=u,h}return r(d[4])(o,[{key:"scrollToEnd",value:function(t){var s=!t||t.animated,n=this.props.getItemCount(this.props.data)-1,o=this._getFrameMetricsApprox(n),l=Math.max(0,o.offset+o.length+this._footerLength-this._scrollMetrics.visibleLength);null!=this._scrollRef&&this._scrollRef.scrollTo(this.props.horizontal?{x:l,animated:s}:{y:l,animated:s})}},{key:"scrollToIndex",value:function(t){var s=this.props,n=s.data,o=s.horizontal,l=s.getItemCount,c=s.getItemLayout,h=s.onScrollToIndexFailed,u=t.animated,p=t.index,f=t.viewOffset,_=t.viewPosition;if(r(d[5])(p>=0&&pthis._highestMeasuredFrameIndex)return r(d[5])(!!h,"scrollToIndex should be used in conjunction with getItemLayout or onScrollToIndexFailed, otherwise there is no way to know the location of offscreen indices or handle failures."),void h({averageItemLength:this._averageCellLength,highestMeasuredFrameIndex:this._highestMeasuredFrameIndex,index:p});var v=this._getFrameMetricsApprox(p),y=Math.max(0,v.offset-(_||0)*(this._scrollMetrics.visibleLength-v.length))-(f||0);null!=this._scrollRef&&this._scrollRef.scrollTo(o?{x:y,animated:u}:{y:y,animated:u})}},{key:"scrollToItem",value:function(t){for(var s=t.item,n=this.props,o=n.data,c=n.getItem,h=(0,n.getItemCount)(o),u=0;u0){u=!1,p='';var x=_?'width':'height',w=this.props.initialScrollIndex?-1:this.props.initialNumToRender-1,k=this.state,E=k.first,O=k.last;this._pushCells(b,I,S,0,w,L);var T=Math.max(w+1,E);if(!C&&E>w+1){var z=!1;if(S.size>0)for(var P=c?1:0,F=T-1;F>w;F--)if(S.has(F+P)){var K=this._getFrameMetricsApprox(w),N=this._getFrameMetricsApprox(F),V=N.offset-K.offset-(this.props.initialScrollIndex?0:K.length);b.push(r(d[9]).createElement(r(d[10]),{key:"$sticky_lead",style:r(d[0])({},x,V)})),this._pushCells(b,I,S,F,F,L);var A=this._getFrameMetricsApprox(E).offset-(N.offset+N.length);b.push(r(d[9]).createElement(r(d[10]),{key:"$sticky_trail",style:r(d[0])({},x,A)})),z=!0;break}if(!z){var D=this._getFrameMetricsApprox(w),B=this._getFrameMetricsApprox(E).offset-(D.offset+D.length);b.push(r(d[9]).createElement(r(d[10]),{key:"$lead_spacer",style:r(d[0])({},x,B)}))}}if(this._pushCells(b,I,S,T,O,L),!this._hasWarned.keys&&u&&(console.warn("VirtualizedList: missing keys for items, make sure to specify a key or id property on each item or provide a custom keyExtractor.",p),this._hasWarned.keys=!0),!C&&Of&&(this._sentEndForContentLength=0)}},{key:"_scheduleCellsToRenderUpdate",value:function(){var t=this.state,s=t.first,n=t.last,o=this._scrollMetrics,l=o.offset,c=o.visibleLength,h=o.velocity,u=this.props.getItemCount(this.props.data),p=!1,f=this.props.onEndReachedThreshold*c/2;if(s>0){var _=l-this._getFrameMetricsApprox(s).offset;p=p||_<0||h<-2&&_2&&v=t[v]&&(f[v]=o,l++,v===t.length-1))return r(d[0])(l===t.length,'bad offsets input, should be in increasing order: %s',JSON.stringify(t)),f;return f}function n(t,n){return n.last-n.first+1-Math.max(0,1+Math.min(n.last,t.last)-Math.max(n.first,t.first))}var s={computeWindowedRenderLimits:function(s,f,l,o){var u=s.data,h=s.getItemCount,v=s.maxToRenderPerBatch,c=s.windowSize,x=h(u);if(0===x)return f;var M=o.offset,w=o.velocity,b=o.visibleLength,p=Math.max(0,M),C=p+b,O=(c-1)*b,y=w>1?'after':w<-1?'before':'none',L=Math.max(0,p-.5*O),R=Math.max(0,C+.5*O);if(l(x-1).offset=T);){var z=k>=v,E=J<=f.first||J>f.last,F=J>I&&(!z||!E),P=N>=f.last||N=J&&J>=0&&N=I&&N<=T&&J<=_.first&&N>=_.last))throw new Error('Bad window calculation '+JSON.stringify({first:J,last:N,itemCount:x,overscanFirst:I,overscanLast:T,visible:_}));return{first:J,last:N}},elementsThatOverlapOffsets:t,newRangeCount:n};m.exports=s},262,[25,27]); -__d(function(g,r,i,a,m,e,d){'use strict';function t(t,n){var s=Object.keys(t);if(Object.getOwnPropertySymbols){var l=Object.getOwnPropertySymbols(t);n&&(l=l.filter(function(n){return Object.getOwnPropertyDescriptor(t,n).enumerable})),s.push.apply(s,l)}return s}function n(n){for(var s=1;sMath.random(),this._resetData()}return r(d[2])(t,null,[{key:"addListener",value:function(t){return r(d[3])(null!==_,'Call `FillRateHelper.setSampleRate` before `addListener`.'),l.push(t),{remove:function(){l=l.filter(function(n){return t!==n})}}}},{key:"setSampleRate",value:function(t){_=t}},{key:"setMinSampleCount",value:function(t){o=t}}]),r(d[2])(t,[{key:"activate",value:function(){this._enabled&&null==this._samplesStartTime&&(this._samplesStartTime=r(d[4])())}},{key:"deactivateAndFlush",value:function(){if(this._enabled){var t=this._samplesStartTime;if(null!=t)if(this._info.sample_count0&&(c=Math.min(h,Math.max(0,p.offset-o)));for(var b=0,k=n.last,v=this._getFrameMetrics(k);k>=n.first&&(!v||!v.inLayout);)v=this._getFrameMetrics(k),k--;if(v&&k0?(this._anyBlankStartTime=f,this._info.any_blank_speed_sum+=u,this._info.any_blank_count++,this._info.pixels_blank+=M,O>.5&&(this._mostlyBlankStartTime=f,this._info.mostly_blank_count++)):(u<.01||Math.abs(l)<1)&&this.deactivateAndFlush(),O}},{key:"enabled",value:function(){return this._enabled}},{key:"_resetData",value:function(){this._anyBlankStartTime=null,this._info=new s,this._mostlyBlankStartTime=null,this._samplesStartTime=null}}]),t})();m.exports=h},263,[19,5,6,96,144]); -__d(function(g,r,i,a,m,e,d){'use strict';var t=(function(){function t(n,l){r(d[0])(this,t),this._delay=l,this._callback=n}return r(d[1])(t,[{key:"dispose",value:function(){var t=arguments.length>0&&void 0!==arguments[0]?arguments[0]:{abort:!1};this._taskHandle&&(this._taskHandle.cancel(),t.abort||this._callback(),this._taskHandle=null)}},{key:"schedule",value:function(){var t=this;if(!this._taskHandle){var n=setTimeout(function(){t._taskHandle=r(d[2]).runAfterInteractions(function(){t._taskHandle=null,t._callback()})},this._delay);this._taskHandle={cancel:function(){return clearTimeout(n)}}}}}]),t})();m.exports=t},264,[5,6,209]); -__d(function(g,r,i,a,m,e,d){'use strict';function t(t,n){var o=Object.keys(t);if(Object.getOwnPropertySymbols){var s=Object.getOwnPropertySymbols(t);n&&(s=s.filter(function(n){return Object.getOwnPropertyDescriptor(t,n).enumerable})),o.push.apply(o,s)}return o}function n(n){for(var o=1;o=t.length?{done:!0}:{done:!1,value:t[c++]}}}throw new TypeError("Invalid attempt to iterate non-iterable instance.\nIn order to be iterable, non-array objects must have a [Symbol.iterator]() method.")}return(o=t[Symbol.iterator]()).next.bind(o)}function s(t,n){if(t){if("string"==typeof t)return c(t,n);var o=Object.prototype.toString.call(t).slice(8,-1);return"Object"===o&&t.constructor&&(o=t.constructor.name),"Map"===o||"Set"===o?Array.from(t):"Arguments"===o||/^(?:Ui|I)nt(?:8|16|32)(?:Clamped)?Array$/.test(o)?c(t,n):void 0}}function c(t,n){(null==n||n>t.length)&&(n=t.length);for(var o=0,s=new Array(n);o0&&void 0!==arguments[0]?arguments[0]:{viewAreaCoveragePercentThreshold:0};r(d[1])(this,t),this._hasInteracted=!1,this._timers=new Set,this._viewableIndices=[],this._viewableItems=new Map,this._config=n}return r(d[2])(t,[{key:"dispose",value:function(){this._timers.forEach(clearTimeout)}},{key:"computeViewableItems",value:function(t,n,o,s,c){var l=this._config,f=l.itemVisiblePercentThreshold,h=l.viewAreaCoveragePercentThreshold,v=null!=h,b=v?h:f;r(d[3])(null!=b&&null!=f!=(null!=h),'Must set exactly one of itemVisiblePercentThreshold or viewAreaCoveragePercentThreshold');var y=[];if(0===t)return y;var p=-1,w=c||{first:0,last:t-1},_=w.first,I=w.last;if(I>=t)return console.warn('Invalid render range computing viewability '+JSON.stringify({renderRange:c,itemCount:t})),[];for(var O=_;O<=I;O++){var j=s(O);if(j){var P=j.offset-n,S=P+j.length;if(P0)p=O,u(v,b,P,S,o,j.length)&&y.push(O);else if(p>=0)break}}return y}},{key:"onUpdate",value:function(t,n,o,s,c,l,u){var f=this;if((!this._config.waitForInteraction||this._hasInteracted)&&0!==t&&s(0)){var h=[];if(t&&(h=this.computeViewableItems(t,n,o,s,u)),this._viewableIndices.length!==h.length||!this._viewableIndices.every(function(t,n){return t===h[n]}))if(this._viewableIndices=h,this._config.minimumViewTime){var v=setTimeout(function(){f._timers.delete(v),f._onUpdateSync(h,l,c)},this._config.minimumViewTime);this._timers.add(v)}else this._onUpdateSync(h,l,c)}}},{key:"resetViewableIndices",value:function(){this._viewableIndices=[]}},{key:"recordInteraction",value:function(){this._hasInteracted=!0}},{key:"_onUpdateSync",value:function(t,s,c){var l=this;t=t.filter(function(t){return l._viewableIndices.includes(t)});for(var u,f=this._viewableItems,h=new Map(t.map(function(t){var n=c(t,!0);return[n.key,n]})),v=[],b=o(h);!(u=b()).done;){var y=u.value,p=r(d[4])(y,2),w=p[0],_=p[1];f.has(w)||v.push(_)}for(var I,O=o(f);!(I=O()).done;){var j=I.value,P=r(d[4])(j,2),S=P[0],A=P[1];h.has(S)||v.push(n(n({},A),{},{isViewable:!1}))}v.length>0&&(this._viewableItems=h,s({viewableItems:Array.from(h.values()),changed:v,viewabilityConfig:this._config}))}}]),t})();function u(t,n,o,s,c,l){if(h(o,s,c))return!0;var u=f(o,s,c);return 100*(t?u/c:u/l)>=n}function f(t,n,o){var s=Math.min(n,o)-Math.max(t,0);return Math.max(0,s)}function h(t,n,o){return t>=0&&n<=o&&n>t}m.exports=l},265,[19,5,6,25,27]); -__d(function(g,r,i,a,m,e,d){'use strict';m.exports=r(d[0])(r(d[1]))},266,[230,267]); -__d(function(g,r,i,a,m,e,d){'use strict';var t=r(d[0])(r(d[1])),n=r(d[0])(r(d[2])),o=r(d[0])(r(d[3])),u=r(d[0])(r(d[4]));var c=function(n,o){var u,c,l=r(d[5])(n.source)||{uri:void 0,width:void 0,height:void 0};if(Array.isArray(l))c=r(d[6])([s.base,n.style])||{},u=l;else{var h=l.width,f=l.height,p=l.uri;c=r(d[6])([{width:h,height:f},s.base,n.style])||{},u=[l],''===p&&console.warn('source.uri should not be an empty string')}var w=n.resizeMode||c.resizeMode||'cover',v=c.tintColor;if(null!=n.src&&console.warn('The component requires a `source` property rather than `src`.'),null!=n.children)throw new Error('The component cannot contain children. If you want to render content on top of the image, consider using the component or absolute positioning.');return r(d[7]).createElement(r(d[8]),(0,t.default)({},n,{ref:o,style:c,resizeMode:w,tintColor:v,source:u}))};(c=r(d[7]).forwardRef(c)).displayName='Image',c.getSize=function(t,n,c){u.default.getSize(t).then(function(t){var u=(0,o.default)(t,2),c=u[0],s=u[1];return n(c,s)}).catch(c||function(){console.warn('Failed to get size for image '+t)})},c.getSizeWithHeaders=function(t,n,o,c){return u.default.getSizeWithHeaders(t,n).then(function(t){o(t.width,t.height)}).catch(c||function(){console.warn('Failed to get size for image: '+t)})},c.prefetch=function(t){return u.default.prefetchImage(t)},c.queryCache=function(t){return n.default.async(function(o){for(;;)switch(o.prev=o.next){case 0:return o.next=2,n.default.awrap(u.default.queryCache(t));case 2:return o.abrupt("return",o.sent);case 3:case"end":return o.stop()}},null,null,null,Promise)},c.resolveAssetSource=r(d[5]),c.propTypes=r(d[9]);var s=r(d[10]).create({base:{overflow:'hidden'}});m.exports=c},267,[1,17,3,27,268,119,93,14,269,270,191]); -__d(function(g,r,i,a,m,e,d){'use strict';Object.defineProperty(e,"__esModule",{value:!0}),e.default=void 0;var t=r(d[0])(r(d[1])).getEnforcing('ImageLoader');e.default=t},268,[13,24]); -__d(function(g,r,i,a,m,e,d){'use strict';var s,t=r(d[0])(r(d[1]));s=g.RN$Bridgeless?(0,t.default)('RCTImageView'):r(d[2])('RCTImageView'),m.exports=s},269,[1,57,58]); -__d(function(g,r,i,a,m,e,d){'use strict';m.exports={style:r(d[0])(r(d[1])),source:r(d[2]),defaultSource:r(d[3]).oneOfType([r(d[3]).shape({uri:r(d[3]).string,width:r(d[3]).number,height:r(d[3]).number,scale:r(d[3]).number}),r(d[3]).number]),accessible:r(d[3]).bool,accessibilityLabel:r(d[3]).node,blurRadius:r(d[3]).number,capInsets:r(d[4]),resizeMethod:r(d[3]).oneOf(['auto','resize','scale']),resizeMode:r(d[3]).oneOf(['cover','contain','stretch','repeat','center']),testID:r(d[3]).string,onLayout:r(d[3]).func,onLoadStart:r(d[3]).func,onProgress:r(d[3]).func,onError:r(d[3]).func,onPartialLoad:r(d[3]).func,onLoad:r(d[3]).func,onLoadEnd:r(d[3]).func}},270,[271,110,273,101,274]); -__d(function(g,r,i,a,m,e,d){'use strict';m.exports=function(n){var t=r(d[0])(n);return function(n,o,c,u){var f=n;n[o]&&((f={})[o]=r(d[1])(n[o]));for(var v=arguments.length,p=new Array(v>4?v-4:0),s=4;s5?O-5:0),v=5;v4?s-4:0),f=4;f0&&this.props.stickySectionHeadersEnabled&&(c+=this._listRef._getFrameMetricsApprox(o-t.itemIndex).length);var p=n(n({},t),{},{viewOffset:c,index:o});this._listRef.scrollToIndex(p)}},{key:"getListRef",value:function(){return this._listRef}}]),r(d[4])(c,[{key:"UNSAFE_componentWillReceiveProps",value:function(t){this.setState(this._computeState(t))}},{key:"_computeState",value:function(t){var o=this,s=t.ListHeaderComponent?1:0,c=[],p=t.sections?t.sections.reduce(function(n,o){return c.push(n+s),n+t.getItemCount(o.data)+2},0):0;t.SectionSeparatorComponent,t.renderItem,t.renderSectionFooter,t.renderSectionHeader,t.sections,t.stickySectionHeadersEnabled;return{childProps:n(n({},r(d[9])(t,["SectionSeparatorComponent","renderItem","renderSectionFooter","renderSectionHeader","sections","stickySectionHeadersEnabled"])),{},{renderItem:this._renderItem,ItemSeparatorComponent:void 0,data:t.sections,getItemCount:function(){return p},getItem:function(n,s){return o._getItem(t,n,s)},keyExtractor:this._keyExtractor,onViewableItemsChanged:t.onViewableItemsChanged?this._onViewableItemsChanged:void 0,stickyHeaderIndices:t.stickySectionHeadersEnabled?c:void 0})}}},{key:"render",value:function(){return r(d[8]).createElement(r(d[10]),r(d[11])({},this.state.childProps,{ref:this._captureRef}))}},{key:"_subExtractor",value:function(t){for(var n=t,o=this.props,s=o.getItem,c=o.getItemCount,p=o.keyExtractor,l=o.sections,u=0;u=c(h)+1)n-=c(h)+1;else return-1===n?{section:f,key:S+':header',index:null,header:!0,trailingSection:l[u+1]}:n===c(h)?{section:f,key:S+':footer',index:null,header:!1,trailingSection:l[u+1]}:{section:f,key:S+':'+(f.keyExtractor||p)(s(h,n),n),index:n,leadingItem:s(h,n-1),leadingSection:l[u-1],trailingItem:s(h,n+1),trailingSection:l[u+1]}}}},{key:"_getSeparatorComponent",value:function(t,n){if(!(n=n||this._subExtractor(t)))return null;var o=n.section.ItemSeparatorComponent||this.props.ItemSeparatorComponent,s=this.props.SectionSeparatorComponent,c=t===this.state.childProps.getItemCount()-1,p=n.index===this.props.getItemCount(n.section.data)-1;return s&&p?s:!o||p||c?null:o}}]),c})(r(d[8]).PureComponent);c.defaultProps=n(n({},r(d[10]).defaultProps),{},{data:[]});var p=(function(t){r(d[3])(c,t);var s=o(c);function c(){var t;r(d[5])(this,c);for(var o=arguments.length,p=new Array(o),l=0;l0,s=R&&R.length>0;return!o&&s?R[0]:o?E[0]:t},o=r(d[1])({NOT_RESPONDER:null,RESPONDER_INACTIVE_PRESS_IN:null,RESPONDER_INACTIVE_PRESS_OUT:null,RESPONDER_ACTIVE_PRESS_IN:null,RESPONDER_ACTIVE_PRESS_OUT:null,RESPONDER_ACTIVE_LONG_PRESS_IN:null,RESPONDER_ACTIVE_LONG_PRESS_OUT:null,ERROR:null}),s={NOT_RESPONDER:!1,RESPONDER_INACTIVE_PRESS_IN:!1,RESPONDER_INACTIVE_PRESS_OUT:!1,RESPONDER_ACTIVE_PRESS_IN:!1,RESPONDER_ACTIVE_PRESS_OUT:!1,RESPONDER_ACTIVE_LONG_PRESS_IN:!1,RESPONDER_ACTIVE_LONG_PRESS_OUT:!1,ERROR:!1},n=E(E({},s),{},{RESPONDER_ACTIVE_PRESS_OUT:!0,RESPONDER_ACTIVE_PRESS_IN:!0}),_=E(E({},s),{},{RESPONDER_INACTIVE_PRESS_IN:!0,RESPONDER_ACTIVE_PRESS_IN:!0,RESPONDER_ACTIVE_LONG_PRESS_IN:!0}),S=E(E({},s),{},{RESPONDER_ACTIVE_LONG_PRESS_IN:!0}),l=r(d[1])({DELAY:null,RESPONDER_GRANT:null,RESPONDER_RELEASE:null,RESPONDER_TERMINATED:null,ENTER_PRESS_RECT:null,LEAVE_PRESS_RECT:null,LONG_PRESS_DETECTED:null}),h={NOT_RESPONDER:{DELAY:o.ERROR,RESPONDER_GRANT:o.RESPONDER_INACTIVE_PRESS_IN,RESPONDER_RELEASE:o.ERROR,RESPONDER_TERMINATED:o.ERROR,ENTER_PRESS_RECT:o.ERROR,LEAVE_PRESS_RECT:o.ERROR,LONG_PRESS_DETECTED:o.ERROR},RESPONDER_INACTIVE_PRESS_IN:{DELAY:o.RESPONDER_ACTIVE_PRESS_IN,RESPONDER_GRANT:o.ERROR,RESPONDER_RELEASE:o.NOT_RESPONDER,RESPONDER_TERMINATED:o.NOT_RESPONDER,ENTER_PRESS_RECT:o.RESPONDER_INACTIVE_PRESS_IN,LEAVE_PRESS_RECT:o.RESPONDER_INACTIVE_PRESS_OUT,LONG_PRESS_DETECTED:o.ERROR},RESPONDER_INACTIVE_PRESS_OUT:{DELAY:o.RESPONDER_ACTIVE_PRESS_OUT,RESPONDER_GRANT:o.ERROR,RESPONDER_RELEASE:o.NOT_RESPONDER,RESPONDER_TERMINATED:o.NOT_RESPONDER,ENTER_PRESS_RECT:o.RESPONDER_INACTIVE_PRESS_IN,LEAVE_PRESS_RECT:o.RESPONDER_INACTIVE_PRESS_OUT,LONG_PRESS_DETECTED:o.ERROR},RESPONDER_ACTIVE_PRESS_IN:{DELAY:o.ERROR,RESPONDER_GRANT:o.ERROR,RESPONDER_RELEASE:o.NOT_RESPONDER,RESPONDER_TERMINATED:o.NOT_RESPONDER,ENTER_PRESS_RECT:o.RESPONDER_ACTIVE_PRESS_IN,LEAVE_PRESS_RECT:o.RESPONDER_ACTIVE_PRESS_OUT,LONG_PRESS_DETECTED:o.RESPONDER_ACTIVE_LONG_PRESS_IN},RESPONDER_ACTIVE_PRESS_OUT:{DELAY:o.ERROR,RESPONDER_GRANT:o.ERROR,RESPONDER_RELEASE:o.NOT_RESPONDER,RESPONDER_TERMINATED:o.NOT_RESPONDER,ENTER_PRESS_RECT:o.RESPONDER_ACTIVE_PRESS_IN,LEAVE_PRESS_RECT:o.RESPONDER_ACTIVE_PRESS_OUT,LONG_PRESS_DETECTED:o.ERROR},RESPONDER_ACTIVE_LONG_PRESS_IN:{DELAY:o.ERROR,RESPONDER_GRANT:o.ERROR,RESPONDER_RELEASE:o.NOT_RESPONDER,RESPONDER_TERMINATED:o.NOT_RESPONDER,ENTER_PRESS_RECT:o.RESPONDER_ACTIVE_LONG_PRESS_IN,LEAVE_PRESS_RECT:o.RESPONDER_ACTIVE_LONG_PRESS_OUT,LONG_PRESS_DETECTED:o.RESPONDER_ACTIVE_LONG_PRESS_IN},RESPONDER_ACTIVE_LONG_PRESS_OUT:{DELAY:o.ERROR,RESPONDER_GRANT:o.ERROR,RESPONDER_RELEASE:o.NOT_RESPONDER,RESPONDER_TERMINATED:o.NOT_RESPONDER,ENTER_PRESS_RECT:o.RESPONDER_ACTIVE_LONG_PRESS_IN,LEAVE_PRESS_RECT:o.RESPONDER_ACTIVE_LONG_PRESS_OUT,LONG_PRESS_DETECTED:o.ERROR},error:{DELAY:o.NOT_RESPONDER,RESPONDER_GRANT:o.RESPONDER_INACTIVE_PRESS_IN,RESPONDER_RELEASE:o.NOT_RESPONDER,RESPONDER_TERMINATED:o.NOT_RESPONDER,ENTER_PRESS_RECT:o.NOT_RESPONDER,LEAVE_PRESS_RECT:o.NOT_RESPONDER,LONG_PRESS_DETECTED:o.NOT_RESPONDER}},u={componentDidMount:function(){r(d[2]).isTV&&(this._tvEventHandler=new(r(d[3])),this._tvEventHandler.enable(this,function(t,E){var R=r(d[4]).findNodeHandle(t);E.dispatchConfig={},R===E.tag&&('focus'===E.eventType?t.touchableHandleFocus(E):'blur'===E.eventType?t.touchableHandleBlur(E):'select'===E.eventType&&t.touchableHandlePress&&!t.props.disabled&&t.touchableHandlePress(E))}))},componentWillUnmount:function(){this._tvEventHandler&&(this._tvEventHandler.disable(),delete this._tvEventHandler),this.touchableDelayTimeout&&clearTimeout(this.touchableDelayTimeout),this.longPressDelayTimeout&&clearTimeout(this.longPressDelayTimeout),this.pressOutDelayTimeout&&clearTimeout(this.pressOutDelayTimeout)},touchableGetInitialState:function(){return{touchable:{touchState:void 0,responderID:null}}},touchableHandleResponderTerminationRequest:function(){return!this.props.rejectResponderTermination},touchableHandleStartShouldSetResponder:function(){return!this.props.disabled},touchableLongPressCancelsPress:function(){return!0},touchableHandleResponderGrant:function(t){var E=t.currentTarget;t.persist(),this.pressOutDelayTimeout&&clearTimeout(this.pressOutDelayTimeout),this.pressOutDelayTimeout=null,this.state.touchable.touchState=o.NOT_RESPONDER,this.state.touchable.responderID=E,this._receiveSignal(l.RESPONDER_GRANT,t);var R=void 0!==this.touchableGetHighlightDelayMS?Math.max(this.touchableGetHighlightDelayMS(),0):130;0!==(R=isNaN(R)?130:R)?this.touchableDelayTimeout=setTimeout(this._handleDelay.bind(this,t),R):this._handleDelay(t);var s=void 0!==this.touchableGetLongPressDelayMS?Math.max(this.touchableGetLongPressDelayMS(),10):370;s=isNaN(s)?370:s,this.longPressDelayTimeout=setTimeout(this._handleLongDelay.bind(this,t),s+R)},touchableHandleResponderRelease:function(t){this.pressInLocation=null,this._receiveSignal(l.RESPONDER_RELEASE,t)},touchableHandleResponderTerminate:function(t){this.pressInLocation=null,this._receiveSignal(l.RESPONDER_TERMINATED,t)},touchableHandleResponderMove:function(t){if(this.state.touchable.positionOnActivate){var E=this.state.touchable.positionOnActivate,s=this.state.touchable.dimensionsOnActivate,n=this.touchableGetPressRectOffset?this.touchableGetPressRectOffset():{left:20,right:20,top:20,bottom:20},_=n.left,S=n.top,h=n.right,u=n.bottom,O=this.touchableGetHitSlop?this.touchableGetHitSlop():null;O&&(_+=O.left||0,S+=O.top||0,h+=O.right||0,u+=O.bottom||0);var N=R(t.nativeEvent),P=N&&N.pageX,T=N&&N.pageY;if(this.pressInLocation)this._getDistanceBetweenPoints(P,T,this.pressInLocation.pageX,this.pressInLocation.pageY)>10&&this._cancelLongPressDelayTimeout();if(P>E.left-_&&T>E.top-S&&P>`");s!==n&&(this._performSideEffectsForTransition(s,n,t,E),this.state.touchable.touchState=n)}},_cancelLongPressDelayTimeout:function(){this.longPressDelayTimeout&&clearTimeout(this.longPressDelayTimeout),this.longPressDelayTimeout=null},_isHighlight:function(t){return t===o.RESPONDER_ACTIVE_PRESS_IN||t===o.RESPONDER_ACTIVE_LONG_PRESS_IN},_savePressInLocation:function(t){var E=R(t.nativeEvent),o=E&&E.pageX,s=E&&E.pageY,n=E&&E.locationX,_=E&&E.locationY;this.pressInLocation={pageX:o,pageY:s,locationX:n,locationY:_}},_getDistanceBetweenPoints:function(t,E,R,o){var s=t-R,n=E-o;return Math.sqrt(s*s+n*n)},_performSideEffectsForTransition:function(t,E,R,s){var h=this._isHighlight(t),u=this._isHighlight(E);(R===l.RESPONDER_TERMINATED||R===l.RESPONDER_RELEASE)&&this._cancelLongPressDelayTimeout();var O=t===o.NOT_RESPONDER&&E===o.RESPONDER_INACTIVE_PRESS_IN,N=!n[t]&&n[E];if((O||N)&&this._remeasureMetricsOnActivation(),_[t]&&R===l.LONG_PRESS_DETECTED&&this.touchableHandleLongPress&&this.touchableHandleLongPress(s),u&&!h?this._startHighlight(s):!u&&h&&this._endHighlight(s),_[t]&&R===l.RESPONDER_RELEASE){var P=!!this.props.onLongPress,T=S[t]&&(!P||!this.touchableLongPressCancelsPress());(!S[t]||T)&&this.touchableHandlePress&&(u||h||(this._startHighlight(s),this._endHighlight(s)),this.touchableHandlePress(s))}this.touchableDelayTimeout&&clearTimeout(this.touchableDelayTimeout),this.touchableDelayTimeout=null},_startHighlight:function(t){this._savePressInLocation(t),this.touchableHandleActivePressIn&&this.touchableHandleActivePressIn(t)},_endHighlight:function(t){var E=this;this.touchableHandleActivePressOut&&(this.touchableGetPressOutDelayMS&&this.touchableGetPressOutDelayMS()?this.pressOutDelayTimeout=setTimeout(function(){E.touchableHandleActivePressOut(t)},this.touchableGetPressOutDelayMS()):this.touchableHandleActivePressOut(t))},withoutDefaultFocusAndBlur:{}},O=(u.touchableHandleFocus,u.touchableHandleBlur,r(d[8])(u,["touchableHandleFocus","touchableHandleBlur"]));u.withoutDefaultFocusAndBlur=O;var N={Mixin:u,TOUCH_TARGET_DEBUG:!1,renderDebugView:function(t){t.color,t.hitSlop;if(!N.TOUCH_TARGET_DEBUG)return null;throw Error('Touchable.TOUCH_TARGET_DEBUG should not be enabled in prod!')}};r(d[12]).create({debug:{position:'absolute',borderWidth:1,borderStyle:'dashed'}});m.exports=N},282,[19,210,82,199,131,87,283,285,126,106,14,128,191]); -__d(function(g,r,i,a,m,e,d){'use strict';function t(t,o){this.left=t,this.top=o}t.prototype.destructor=function(){this.left=null,this.top=null},r(d[0]).addPoolingTo(t,r(d[0]).twoArgumentPooler),m.exports=t},283,[284]); -__d(function(g,r,i,a,m,e,d){'use strict';var t=function(t){if(this.instancePool.length){var n=this.instancePool.pop();return this.call(n,t),n}return new this(t)},n=function(t){r(d[0])(t instanceof this,'Trying to release an instance into a pool of a different type.'),t.destructor(),this.instancePool.length10?o:10,update:{duration:o>10?o:10,type:r(d[4]).Types[s]||'keyboard'}}),n.setState({bottom:c}))}else n.setState({bottom:0})},n._onLayout=function(t){n._frame=t.nativeEvent.layout,n._initialFrameHeight||(n._initialFrameHeight=n._frame.height)},n.state={bottom:0},n.viewRef=r(d[5]).createRef(),n}return r(d[6])(c,[{key:"_relativeKeyboardHeight",value:function(t){var n=this._frame;if(!n||!t)return 0;var o=t.screenY-this.props.keyboardVerticalOffset;return Math.max(n.y+n.height-o,0)}},{key:"componentDidMount",value:function(){this._subscriptions=[r(d[7]).addListener('keyboardWillChangeFrame',this._onKeyboardChange)]}},{key:"componentWillUnmount",value:function(){this._subscriptions.forEach(function(t){t.remove()})}},{key:"render",value:function(){var t=this.props,n=t.behavior,o=t.children,s=t.contentContainerStyle,u=t.enabled,c=(t.keyboardVerticalOffset,t.style),l=r(d[8])(t,["behavior","children","contentContainerStyle","enabled","keyboardVerticalOffset","style"]),f=u?this.state.bottom:0;switch(n){case'height':var h;return null!=this._frame&&this.state.bottom>0&&(h={height:this._initialFrameHeight-f,flex:0}),r(d[5]).createElement(r(d[9]),r(d[10])({ref:this.viewRef,style:r(d[11]).compose(c,h),onLayout:this._onLayout},l),o);case'position':return r(d[5]).createElement(r(d[9]),r(d[10])({ref:this.viewRef,style:c,onLayout:this._onLayout},l),r(d[5]).createElement(r(d[9]),{style:r(d[11]).compose(s,{bottom:f})},o));case'padding':return r(d[5]).createElement(r(d[9]),r(d[10])({ref:this.viewRef,style:r(d[11]).compose(c,{paddingBottom:f}),onLayout:this._onLayout},l),o);default:return r(d[5]).createElement(r(d[9]),r(d[10])({ref:this.viewRef,onLayout:this._onLayout,style:c},l),o)}}}]),c})(r(d[5]).Component);n.defaultProps={enabled:!0,keyboardVerticalOffset:0},m.exports=n},299,[12,9,7,5,254,14,6,251,126,128,17,191]); -__d(function(g,r,i,a,m,e,d){var t=r(d[0])(r(d[1])),n=r(d[0])(r(d[2])),l=r(d[0])(r(d[3])),c=r(d[0])(r(d[4])),s=r(d[0])(r(d[5])),o=r(d[0])(r(d[6])),u=r(d[0])(r(d[7]));function f(){if("undefined"==typeof Reflect||!Reflect.construct)return!1;if(Reflect.construct.sham)return!1;if("function"==typeof Proxy)return!0;try{return Date.prototype.toString.call(Reflect.construct(Date,[],function(){})),!0}catch(t){return!1}}var p=(function(p){(0,c.default)(y,p);var h,v,k=(h=y,v=f(),function(){var t,n=(0,o.default)(h);if(v){var l=(0,o.default)(this).constructor;t=Reflect.construct(n,arguments,l)}else t=n.apply(this,arguments);return(0,s.default)(this,t)});function y(){var t;(0,n.default)(this,y);for(var l=arguments.length,c=new Array(l),s=0;s0&&this._nativeSwitchRef&&this._nativeSwitchRef.setNativeProps&&h.Commands.setValue(this._nativeSwitchRef,t.value)}}]),_})(r(d[12]).Component),p=function(){return!1},C=function(){return!0};m.exports=v},319,[1,17,126,5,6,7,9,12,13,320,321,191,14]); -__d(function(g,r,i,a,m,e,d){'use strict';Object.defineProperty(e,"__esModule",{value:!0}),e.default=e.Commands=void 0;r(d[0])(r(d[1]));var t=r(d[2])(r(d[3])),u=r(d[2])(r(d[4])),n=(0,t.default)({supportedCommands:['setNativeValue']});e.Commands=n;var o=(0,u.default)('AndroidSwitch',{interfaceOnly:!0});e.default=o},320,[13,14,1,130,57]); -__d(function(g,r,i,a,m,e,d){'use strict';Object.defineProperty(e,"__esModule",{value:!0}),e.default=e.Commands=void 0;r(d[0])(r(d[1]));var t=r(d[2])(r(d[3])),o=(0,r(d[2])(r(d[4])).default)({supportedCommands:['setValue']});e.Commands=o;var u=(0,t.default)('Switch',{paperComponentName:'RCTSwitch',excludedPlatform:'android'});e.default=u},321,[13,14,1,57,130]); -__d(function(g,r,i,a,m,e,d){'use strict';var t=r(d[0])(r(d[1])),n=r(d[0])(r(d[2])),l=r(d[0])(r(d[3])),u=r(d[0])(r(d[4])),o=r(d[0])(r(d[5])),c=r(d[0])(r(d[6])),s=(r(d[0])(r(d[7])),r(d[0])(r(d[8])));function f(){if("undefined"==typeof Reflect||!Reflect.construct)return!1;if(Reflect.construct.sham)return!1;if("function"==typeof Proxy)return!0;try{return Date.prototype.toString.call(Reflect.construct(Date,[],function(){})),!0}catch(t){return!1}}function p(t){return{backgroundColor:null!=t.backgroundColor?{value:t.backgroundColor,animated:t.animated}:null,barStyle:null!=t.barStyle?{value:t.barStyle,animated:t.animated}:null,translucent:t.translucent,hidden:null!=t.hidden?{value:t.hidden,animated:t.animated,transition:t.showHideTransition}:null,networkActivityIndicatorVisible:t.networkActivityIndicatorVisible}}var y=(function(c){(0,l.default)(h,c);var y,k,v=(y=h,k=f(),function(){var t,n=(0,o.default)(y);if(k){var l=(0,o.default)(this).constructor;t=Reflect.construct(n,arguments,l)}else t=n.apply(this,arguments);return(0,u.default)(this,t)});function h(){var n;(0,t.default)(this,h);for(var l=arguments.length,u=new Array(l),o=0;o0&&f.current&&f.current.setNativeProps(n)},[f,t.value,S,v,R]),l(t.autoFocus,f),r(d[2]).useEffect(function(){var t=r(d[4]).findNodeHandle(f.current);if(null!=t)return r(d[5]).registerInput(t),function(){r(d[5]).unregisterInput(t)}},[f]),r(d[2]).useEffect(function(){return function(){F()&&r(d[6])(f.current).blur()}},[f]);var I,P=r(d[7])({getForwardedRef:function(){return t.forwardedRef},setLocalRef:function(t){f.current=t,t&&(t.clear=j,t.isFocused=F,t.getNativeRef=E)}}),w=n({},null),x=t.multiline?o:u,D=t.multiline?[p.multilineInput,t.style]:t.style;return w.rejectResponderTermination=t.rejectResponderTermination,I=r(d[2]).createElement(x,r(d[8])({ref:P},t,{dataDetectorTypes:t.dataDetectorTypes,onBlur:function(n){r(d[5]).blurField(r(d[4]).findNodeHandle(f.current)),t.onBlur&&t.onBlur(n)},onChange:function(n){f.current&&f.current.setNativeProps({mostRecentEventCount:n.nativeEvent.eventCount});var o=n.nativeEvent.text;t.onChange&&t.onChange(n),t.onChangeText&&t.onChangeText(o),f.current&&C(o)},onContentSizeChange:t.onContentSizeChange,onFocus:function(n){r(d[5]).focusField(r(d[4]).findNodeHandle(f.current)),t.onFocus&&t.onFocus(n)},onScroll:function(n){t.onScroll&&t.onScroll(n)},onSelectionChange:function(n){t.onSelectionChange&&t.onSelectionChange(n),f.current&&T(n.nativeEvent.selection)},onSelectionChangeShouldSetResponder:c,selection:v,style:D,text:'string'==typeof t.value?t.value:'string'==typeof t.defaultValue?t.defaultValue:''})),r(d[2]).createElement(r(d[9]).Provider,{value:!0},r(d[2]).createElement(r(d[10]),r(d[8])({onLayout:t.onLayout,onPress:function(n){(t.editable||void 0===t.editable)&&r(d[6])(f.current).focus()},accessible:t.accessible,accessibilityLabel:t.accessibilityLabel,accessibilityRole:t.accessibilityRole,accessibilityState:t.accessibilityState,nativeID:t.nativeID,testID:t.testID},w),I))}var f=r(d[2]).forwardRef(function(t,n){return r(d[2]).createElement(s,r(d[8])({},t,{forwardedRef:n}))});f.defaultProps={allowFontScaling:!0,rejectResponderTermination:!0,underlineColorAndroid:'transparent'},f.propTypes=r(d[11]),f.State={currentlyFocusedField:r(d[5]).currentlyFocusedField,focusTextInput:r(d[5]).focusTextInput,blurTextInput:r(d[5]).blurTextInput};var p=r(d[12]).create({multilineInput:{paddingTop:5}});m.exports=f},325,[19,58,14,27,131,86,287,231,17,286,326,327,191]); -__d(function(g,r,i,a,m,e,d){'use strict';var t=r(d[0])(r(d[1])),n=r(d[0])(r(d[2])),o=r(d[0])(r(d[3])),s=r(d[0])(r(d[4])),u=r(d[0])(r(d[5])),l=r(d[0])(r(d[6])),c=r(d[0])(r(d[7])),p=r(d[0])(r(d[8])),f=r(d[0])(r(d[9])),y=r(d[0])(r(d[10])),b=(r(d[0])(r(d[11])),r(d[12])(r(d[13])));function h(t,n){var o;if("undefined"==typeof Symbol||null==t[Symbol.iterator]){if(Array.isArray(t)||(o=v(t))||n&&t&&"number"==typeof t.length){o&&(t=o);var s=0;return function(){return s>=t.length?{done:!0}:{done:!1,value:t[s++]}}}throw new TypeError("Invalid attempt to iterate non-iterable instance.\nIn order to be iterable, non-array objects must have a [Symbol.iterator]() method.")}return(o=t[Symbol.iterator]()).next.bind(o)}function v(t,n){if(t){if("string"==typeof t)return P(t,n);var o=Object.prototype.toString.call(t).slice(8,-1);return"Object"===o&&t.constructor&&(o=t.constructor.name),"Map"===o||"Set"===o?Array.from(t):"Arguments"===o||/^(?:Ui|I)nt(?:8|16|32)(?:Clamped)?Array$/.test(o)?P(t,n):void 0}}function P(t,n){(null==n||n>t.length)&&(n=t.length);for(var o=0,s=new Array(n);o=23};var D='android'===b.default.OS?function(t,n){return n&&F.canUseNativeForeground()?{nativeForegroundAndroid:t}:{nativeBackgroundAndroid:t}}:function(t,n){return null};m.exports=F},331,[1,19,126,5,6,7,9,12,194,198,131,82,128,114,13,14,129]); -__d(function(g,r,i,a,m,e,d){'use strict';var t=r(d[0])(r(d[1])),o=r(d[0])(r(d[2])),n=r(d[0])(r(d[3]));function c(t,o){var n=Object.keys(t);if(Object.getOwnPropertySymbols){var c=Object.getOwnPropertySymbols(t);o&&(c=c.filter(function(o){return Object.getOwnPropertyDescriptor(t,o).enumerable})),n.push.apply(n,c)}return n}function s(o){for(var n=1;nthis.eventPool.length&&this.eventPool.push(e)}function Y(e){e.eventPool=[],e.getPooled=j,e.release=Q}t(a[1])(B.prototype,{preventDefault:function(){this.defaultPrevented=!0;var e=this.nativeEvent;e&&(e.preventDefault?e.preventDefault():"unknown"!=typeof e.returnValue&&(e.returnValue=!1),this.isDefaultPrevented=W)},stopPropagation:function(){var e=this.nativeEvent;e&&(e.stopPropagation?e.stopPropagation():"unknown"!=typeof e.cancelBubble&&(e.cancelBubble=!0),this.isPropagationStopped=W)},persist:function(){this.isPersistent=W},isPersistent:H,destructor:function(){var e,t=this.constructor.Interface;for(e in t)this[e]=null;this.nativeEvent=this._targetInst=this.dispatchConfig=null,this.isPropagationStopped=this.isDefaultPrevented=H,this._dispatchInstances=this._dispatchListeners=null}}),B.Interface={type:null,target:null,currentTarget:function(){return null},eventPhase:null,bubbles:null,cancelable:null,timeStamp:function(e){return e.timeStamp||Date.now()},defaultPrevented:null,isTrusted:null},B.extend=function(e){function n(){}function r(){return i.apply(this,arguments)}var i=this;n.prototype=i.prototype;var l=new n;return t(a[1])(l,r.prototype),r.prototype=l,r.prototype.constructor=r,r.Interface=t(a[1])({},i.Interface,e),r.extend=i.extend,Y(r),r},Y(B);var V=B.extend({touchHistory:function(){return null}});function X(e){return"topTouchStart"===e}function q(e){return"topTouchMove"===e}var $=["topTouchStart"],K=["topTouchMove"],G=["topTouchCancel","topTouchEnd"],J=[],Z={touchBank:J,numberActiveTouches:0,indexOfSingleActiveTouch:-1,mostRecentTimeStamp:0};function ee(e){return e.timeStamp||e.timestamp}function te(e){if(null==(e=e.identifier))throw Error("Touch object is missing identifier.");return e}function ne(e){var t=te(e),n=J[t];n?(n.touchActive=!0,n.startPageX=e.pageX,n.startPageY=e.pageY,n.startTimeStamp=ee(e),n.currentPageX=e.pageX,n.currentPageY=e.pageY,n.currentTimeStamp=ee(e),n.previousPageX=e.pageX,n.previousPageY=e.pageY,n.previousTimeStamp=ee(e)):(n={touchActive:!0,startPageX:e.pageX,startPageY:e.pageY,startTimeStamp:ee(e),currentPageX:e.pageX,currentPageY:e.pageY,currentTimeStamp:ee(e),previousPageX:e.pageX,previousPageY:e.pageY,previousTimeStamp:ee(e)},J[t]=n),Z.mostRecentTimeStamp=ee(e)}function re(e){var t=J[te(e)];t?(t.touchActive=!0,t.previousPageX=t.currentPageX,t.previousPageY=t.currentPageY,t.previousTimeStamp=t.currentTimeStamp,t.currentPageX=e.pageX,t.currentPageY=e.pageY,t.currentTimeStamp=ee(e),Z.mostRecentTimeStamp=ee(e)):console.warn("Cannot record touch move without a touch start.\nTouch Move: %s\n","Touch Bank: %s",le(e),ae())}function ie(e){var t=J[te(e)];t?(t.touchActive=!1,t.previousPageX=t.currentPageX,t.previousPageY=t.currentPageY,t.previousTimeStamp=t.currentTimeStamp,t.currentPageX=e.pageX,t.currentPageY=e.pageY,t.currentTimeStamp=ee(e),Z.mostRecentTimeStamp=ee(e)):console.warn("Cannot record touch end without a touch start.\nTouch End: %s\n","Touch Bank: %s",le(e),ae())}function le(e){return JSON.stringify({identifier:e.identifier,pageX:e.pageX,pageY:e.pageY,timestamp:ee(e)})}function ae(){var e=JSON.stringify(J.slice(0,20));return 20 component.");return n=Tt,Tt+=2,{node:at(n,"RCTRawText",t,{text:e},r)}}var xt=setTimeout,_t=clearTimeout;function kt(e){var t=e.node,n=$e(null,We,{style:{display:"none"}},e.canonical.viewConfig.validAttributes);return{node:st(t,n),canonical:e.canonical}}var wt=/^(.*)[\\\/]/;function Pt(e){var t="";do{e:switch(e.tag){case 3:case 4:case 6:case 7:case 10:case 9:var n="";break e;default:var r=e._debugOwner,i=e._debugSource,l=Ae(e.type);n=null,r&&(n=Ae(r.type)),r=l,l="",i?l=" (at "+i.fileName.replace(wt,"")+":"+i.lineNumber+")":n&&(l=" (created by "+n+")"),n="\n in "+(r||"Unknown")+l}t+=n,e=e.return}while(e);return t}new Set;var Rt=[],Ct=-1;function Nt(e){0>Ct||(e.current=Rt[Ct],Rt[Ct]=null,Ct--)}function It(e,t){Rt[++Ct]=e.current,e.current=t}var Ut={},zt={current:Ut},At={current:!1},Mt=Ut;function Dt(e,t){var n=e.type.contextTypes;if(!n)return Ut;var r=e.stateNode;if(r&&r.__reactInternalMemoizedUnmaskedChildContext===t)return r.__reactInternalMemoizedMaskedChildContext;var i,l={};for(i in n)l[i]=t[i];return r&&((e=e.stateNode).__reactInternalMemoizedUnmaskedChildContext=t,e.__reactInternalMemoizedMaskedChildContext=l),l}function Ot(e){return null!==(e=e.childContextTypes)&&void 0!==e}function Lt(e){Nt(At),Nt(zt)}function Ft(e){Nt(At),Nt(zt)}function Wt(e,t,n){if(zt.current!==Ut)throw Error("Unexpected context found on stack. This error is likely caused by a bug in React. Please file an issue.");It(zt,t),It(At,n)}function Ht(e,n,r){var i=e.stateNode;if(e=n.childContextTypes,"function"!=typeof i.getChildContext)return r;for(var l in i=i.getChildContext())if(!(l in e))throw Error((Ae(n)||"Unknown")+'.getChildContext(): key "'+l+'" is not defined in childContextTypes.');return t(a[1])({},r,{},i)}function Bt(e){var t=e.stateNode;return t=t&&t.__reactInternalMemoizedMergedChildContext||Ut,Mt=zt.current,It(zt,t),It(At,At.current),!0}function jt(e,t,n){var r=e.stateNode;if(!r)throw Error("Expected to have an instance by this point. This error is likely caused by a bug in React. Please file an issue.");n?(t=Ht(e,t,Mt),r.__reactInternalMemoizedMergedChildContext=t,Nt(At),Nt(zt),It(zt,t)):Nt(At),It(At,n)}var Qt={},Yt=void 0!==t(a[4]).unstable_requestPaint?t(a[4]).unstable_requestPaint:function(){},Vt=null,Xt=null,qt=!1,$t=t(a[4]).unstable_now(),Kt=1e4>$t?t(a[4]).unstable_now:function(){return t(a[4]).unstable_now()-$t};function Gt(){switch(t(a[4]).unstable_getCurrentPriorityLevel()){case t(a[4]).unstable_ImmediatePriority:return 99;case t(a[4]).unstable_UserBlockingPriority:return 98;case t(a[4]).unstable_NormalPriority:return 97;case t(a[4]).unstable_LowPriority:return 96;case t(a[4]).unstable_IdlePriority:return 95;default:throw Error("Unknown priority level.")}}function Jt(e){switch(e){case 99:return t(a[4]).unstable_ImmediatePriority;case 98:return t(a[4]).unstable_UserBlockingPriority;case 97:return t(a[4]).unstable_NormalPriority;case 96:return t(a[4]).unstable_LowPriority;case 95:return t(a[4]).unstable_IdlePriority;default:throw Error("Unknown priority level.")}}function Zt(e,n){return e=Jt(e),t(a[4]).unstable_runWithPriority(e,n)}function en(e,n,r){return e=Jt(e),t(a[4]).unstable_scheduleCallback(e,n,r)}function tn(e){return null===Vt?(Vt=[e],Xt=t(a[4]).unstable_scheduleCallback(t(a[4]).unstable_ImmediatePriority,rn)):Vt.push(e),Qt}function nn(){if(null!==Xt){var e=Xt;Xt=null,t(a[4]).unstable_cancelCallback(e)}rn()}function rn(){if(!qt&&null!==Vt){qt=!0;var e=0;try{var n=Vt;Zt(99,function(){for(;e=t&&(Xr=!0),e.firstContext=null)}function yn(e,t){if(dn!==e&&!1!==t&&0!==t)if("number"==typeof t&&1073741823!==t||(dn=e,t=1073741823),t={context:e,observedBits:t,next:null},null===fn){if(null===sn)throw Error("Context can only be read while React is rendering. In classes, you can read it in the render method or getDerivedStateFromProps. In function components, you can read it directly in the function body, but not inside Hooks like useReducer() or useMemo().");fn=t,sn.dependencies={expirationTime:0,firstContext:t,responders:null}}else fn=fn.next=t;return e._currentValue2}var bn=!1;function Tn(e){return{baseState:e,firstUpdate:null,lastUpdate:null,firstCapturedUpdate:null,lastCapturedUpdate:null,firstEffect:null,lastEffect:null,firstCapturedEffect:null,lastCapturedEffect:null}}function En(e){return{baseState:e.baseState,firstUpdate:e.firstUpdate,lastUpdate:e.lastUpdate,firstCapturedUpdate:null,lastCapturedUpdate:null,firstEffect:null,lastEffect:null,firstCapturedEffect:null,lastCapturedEffect:null}}function Sn(e,t){return{expirationTime:e,suspenseConfig:t,tag:0,payload:null,callback:null,next:null,nextEffect:null}}function xn(e,t){null===e.lastUpdate?e.firstUpdate=e.lastUpdate=t:(e.lastUpdate.next=t,e.lastUpdate=t)}function _n(e,t){var n=e.alternate;if(null===n){var r=e.updateQueue,i=null;null===r&&(r=e.updateQueue=Tn(e.memoizedState))}else r=e.updateQueue,i=n.updateQueue,null===r?null===i?(r=e.updateQueue=Tn(e.memoizedState),i=n.updateQueue=Tn(n.memoizedState)):r=e.updateQueue=En(i):null===i&&(i=n.updateQueue=En(r));null===i||r===i?xn(r,t):null===r.lastUpdate||null===i.lastUpdate?(xn(r,t),xn(i,t)):(xn(r,t),i.lastUpdate=t)}function kn(e,t){var n=e.updateQueue;null===(n=null===n?e.updateQueue=Tn(e.memoizedState):wn(e,n)).lastCapturedUpdate?n.firstCapturedUpdate=n.lastCapturedUpdate=t:(n.lastCapturedUpdate.next=t,n.lastCapturedUpdate=t)}function wn(e,t){var n=e.alternate;return null!==n&&t===n.updateQueue&&(t=e.updateQueue=En(t)),t}function Pn(e,n,r,i,l,o){switch(r.tag){case 1:return"function"==typeof(e=r.payload)?e.call(o,i,l):e;case 3:e.effectTag=-4097&e.effectTag|64;case 0:if(null===(l="function"==typeof(e=r.payload)?e.call(o,i,l):e)||void 0===l)break;return t(a[1])({},i,l);case 2:bn=!0}return i}function Rn(e,t,n,r,i){bn=!1;for(var l=(t=wn(e,t)).baseState,a=null,o=0,u=t.firstUpdate,c=l;null!==u;){var s=u.expirationTime;sm?(g=h,h=null):g=h.sibling;var v=d(i,h,o[m],u);if(null===v){null===h&&(h=g);break}e&&h&&null===v.alternate&&t(i,h),a=l(v,a,m),null===s?c=v:s.sibling=v,s=v,h=g}if(m===o.length)return n(i,h),c;if(null===h){for(;mm?(g=h,h=null):g=h.sibling;var y=d(i,h,v.value,u);if(null===y){null===h&&(h=g);break}e&&h&&null===y.alternate&&t(i,h),a=l(y,a,m),null===s?c=y:s.sibling=y,s=y,h=g}if(v.done)return n(i,h),c;if(null===h){for(;!v.done;m++,v=o.next())null!==(v=f(i,v.value,u))&&(a=l(v,a,m),null===s?c=v:s.sibling=v,s=v);return c}for(h=r(i,h);!v.done;m++,v=o.next())null!==(v=p(h,i,m,v.value,u))&&(e&&null!==v.alternate&&h.delete(null===v.key?m:v.key),a=l(v,a,m),null===s?c=v:s.sibling=v,s=v);return e&&h.forEach(function(e){return t(i,e)}),c}return function(e,r,l,o){var u="object"==typeof l&&null!==l&&l.type===Te&&null===l.key;u&&(l=l.props.children);var c="object"==typeof l&&null!==l;if(c)switch(l.$$typeof){case ye:e:{for(c=l.key,u=r;null!==u;){if(u.key===c){if(7===u.tag?l.type===Te:u.elementType===l.type){n(e,u.sibling),(r=i(u,l.type===Te?l.props.children:l.props)).ref=Wn(e,u,l),r.return=e,e=r;break e}n(e,u);break}t(e,u),u=u.sibling}l.type===Te?((r=Gl(l.props.children,e.mode,o,l.key)).return=e,e=r):((o=Kl(l.type,l.key,l.props,null,e.mode,o)).ref=Wn(e,r,l),o.return=e,e=o)}return a(e);case be:e:{for(u=l.key;null!==r;){if(r.key===u){if(4===r.tag&&r.stateNode.containerInfo===l.containerInfo&&r.stateNode.implementation===l.implementation){n(e,r.sibling),(r=i(r,l.children||[])).return=e,e=r;break e}n(e,r);break}t(e,r),r=r.sibling}(r=Zl(l,e.mode,o)).return=e,e=r}return a(e)}if("string"==typeof l||"number"==typeof l)return l=""+l,null!==r&&6===r.tag?(n(e,r.sibling),(r=i(r,l)).return=e,e=r):(n(e,r),(r=Jl(l,e.mode,o)).return=e,e=r),a(e);if(Fn(l))return h(e,r,l,o);if(Ue(l))return m(e,r,l,o);if(c&&Hn(e,l),void 0===l&&!u)switch(e.tag){case 1:case 0:throw e=e.type,Error((e.displayName||e.name||"Component")+"(...): Nothing was returned from render. This usually means a return statement is missing. Or, to render nothing, return null.")}return n(e,r)}}var jn=Bn(!0),Qn=Bn(!1),Yn={},Vn={current:Yn},Xn={current:Yn},qn={current:Yn};function $n(e){if(e===Yn)throw Error("Expected host context to exist. This error is likely caused by a bug in React. Please file an issue.");return e}function Kn(e,t){It(qn,t),It(Xn,e),It(Vn,Yn),Nt(Vn),It(Vn,{isInAParentText:!1})}function Gn(e){Nt(Vn),Nt(Xn),Nt(qn)}function Jn(e){$n(qn.current);var t=$n(Vn.current),n=e.type;n="AndroidTextInput"===n||"RCTMultilineTextInputView"===n||"RCTSinglelineTextInputView"===n||"RCTText"===n||"RCTVirtualText"===n,t!==(n=t.isInAParentText!==n?{isInAParentText:n}:t)&&(It(Xn,e),It(Vn,n))}function Zn(e){Xn.current===e&&(Nt(Vn),Nt(Xn))}var er={current:0};function tr(e){for(var t=e;null!==t;){if(13===t.tag){var n=t.memoizedState;if(null!==n&&(null===(n=n.dehydrated)||it()||it()))return t}else if(19===t.tag&&void 0!==t.memoizedProps.revealOrder){if(0!=(64&t.effectTag))return t}else if(null!==t.child){t.child.return=t,t=t.child;continue}if(t===e)break;for(;null===t.sibling;){if(null===t.return||t.return===e)return null;t=t.return}t.sibling.return=t.return,t=t.sibling}return null}function nr(e,t){return{responder:e,props:t}}var rr=t(a[3]).__SECRET_INTERNALS_DO_NOT_USE_OR_YOU_WILL_BE_FIRED.ReactCurrentDispatcher,ir=t(a[3]).__SECRET_INTERNALS_DO_NOT_USE_OR_YOU_WILL_BE_FIRED.ReactCurrentBatchConfig,lr=0,ar=null,or=null,ur=null,cr=null,sr=null,fr=null,dr=0,pr=null,hr=0,mr=!1,gr=null,vr=0;function yr(){throw Error("Invalid hook call. Hooks can only be called inside of the body of a function component. This could happen for one of the following reasons:\n1. You might have mismatching versions of React and the renderer (such as React DOM)\n2. You might be breaking the Rules of Hooks\n3. You might have more than one copy of React in the same app\nSee https://fb.me/react-invalid-hook-call for tips about how to debug and fix this problem.")}function br(e,t){if(null===t)return!1;for(var n=0;ndr&&Pl(dr=s)):(wl(s,u.suspenseConfig),l=u.eagerReducer===e?u.eagerState:e(l,u.action)),a=u,u=u.next}while(null!==u&&u!==r);c||(o=a,i=l),ln(l,t.memoizedState)||(Xr=!0),t.memoizedState=l,t.baseUpdate=o,t.baseState=i,n.lastRenderedState=l}return[t.memoizedState,n.dispatch]}function wr(e){var t=Sr();return"function"==typeof e&&(e=e()),t.memoizedState=t.baseState=e,e=(e=t.queue={last:null,dispatch:null,lastRenderedReducer:_r,lastRenderedState:e}).dispatch=Or.bind(null,ar,e),[t.memoizedState,e]}function Pr(e){return kr(_r)}function Rr(e,t,n,r){return e={tag:e,create:t,destroy:n,deps:r,next:null},null===pr?(pr={lastEffect:null}).lastEffect=e.next=e:null===(t=pr.lastEffect)?pr.lastEffect=e.next=e:(n=t.next,t.next=e,e.next=n,pr.lastEffect=e),e}function Cr(e,t,n,r){var i=Sr();hr|=e,i.memoizedState=Rr(t,n,void 0,void 0===r?null:r)}function Nr(e,t,n,r){var i=xr();r=void 0===r?null:r;var l=void 0;if(null!==or){var a=or.memoizedState;if(l=a.destroy,null!==r&&br(r,a.deps))return void Rr(0,n,l,r)}hr|=e,i.memoizedState=Rr(t,n,l,r)}function Ir(e,t){return Cr(516,192,e,t)}function Ur(e,t){return Nr(516,192,e,t)}function zr(e,t){return"function"==typeof t?(e=e(),t(e),function(){t(null)}):null!==t&&void 0!==t?(e=e(),t.current=e,function(){t.current=null}):void 0}function Ar(){}function Mr(e,t){return Sr().memoizedState=[e,void 0===t?null:t],e}function Dr(e,t){var n=xr();t=void 0===t?null:t;var r=n.memoizedState;return null!==r&&null!==t&&br(t,r[1])?r[0]:(n.memoizedState=[e,t],e)}function Or(e,t,n){if(!(25>vr))throw Error("Too many re-renders. React limits the number of renders to prevent an infinite loop.");var r=e.alternate;if(e===ar||null!==r&&r===ar)if(mr=!0,e={expirationTime:lr,suspenseConfig:null,action:n,eagerReducer:null,eagerState:null,next:null},null===gr&&(gr=new Map),void 0===(n=gr.get(t)))gr.set(t,e);else{for(t=n;null!==t.next;)t=t.next;t.next=e}else{var i=hl(),l=In.suspense;l={expirationTime:i=ml(i,e,l),suspenseConfig:l,action:n,eagerReducer:null,eagerState:null,next:null};var a=t.last;if(null===a)l.next=l;else{var o=a.next;null!==o&&(l.next=o),a.next=l}if(t.last=l,0===e.expirationTime&&(null===r||0===r.expirationTime)&&null!==(r=t.lastRenderedReducer))try{var u=t.lastRenderedState,c=r(u,n);if(l.eagerReducer=r,l.eagerState=c,ln(c,u))return}catch(e){}gl(e,i)}}var Lr={readContext:yn,useCallback:yr,useContext:yr,useEffect:yr,useImperativeHandle:yr,useLayoutEffect:yr,useMemo:yr,useReducer:yr,useRef:yr,useState:yr,useDebugValue:yr,useResponder:yr,useDeferredValue:yr,useTransition:yr},Fr={readContext:yn,useCallback:Mr,useContext:yn,useEffect:Ir,useImperativeHandle:function(e,t,n){return n=null!==n&&void 0!==n?n.concat([e]):null,Cr(4,36,zr.bind(null,t,e),n)},useLayoutEffect:function(e,t){return Cr(4,36,e,t)},useMemo:function(e,t){var n=Sr();return t=void 0===t?null:t,e=e(),n.memoizedState=[e,t],e},useReducer:function(e,t,n){var r=Sr();return t=void 0!==n?n(t):t,r.memoizedState=r.baseState=t,e=(e=r.queue={last:null,dispatch:null,lastRenderedReducer:e,lastRenderedState:t}).dispatch=Or.bind(null,ar,e),[r.memoizedState,e]},useRef:function(e){return e={current:e},Sr().memoizedState=e},useState:wr,useDebugValue:Ar,useResponder:nr,useDeferredValue:function(e,n){var r=wr(e),i=r[0],l=r[1];return Ir(function(){t(a[4]).unstable_next(function(){var t=ir.suspense;ir.suspense=void 0===n?null:n;try{l(e)}finally{ir.suspense=t}})},[e,n]),i},useTransition:function(e){var n=wr(!1),r=n[0],i=n[1];return[Mr(function(n){i(!0),t(a[4]).unstable_next(function(){var t=ir.suspense;ir.suspense=void 0===e?null:e;try{i(!1),n()}finally{ir.suspense=t}})},[e,r]),r]}},Wr={readContext:yn,useCallback:Dr,useContext:yn,useEffect:Ur,useImperativeHandle:function(e,t,n){return n=null!==n&&void 0!==n?n.concat([e]):null,Nr(4,36,zr.bind(null,t,e),n)},useLayoutEffect:function(e,t){return Nr(4,36,e,t)},useMemo:function(e,t){var n=xr();t=void 0===t?null:t;var r=n.memoizedState;return null!==r&&null!==t&&br(t,r[1])?r[0]:(e=e(),n.memoizedState=[e,t],e)},useReducer:kr,useRef:function(){return xr().memoizedState},useState:Pr,useDebugValue:Ar,useResponder:nr,useDeferredValue:function(e,n){var r=Pr(),i=r[0],l=r[1];return Ur(function(){t(a[4]).unstable_next(function(){var t=ir.suspense;ir.suspense=void 0===n?null:n;try{l(e)}finally{ir.suspense=t}})},[e,n]),i},useTransition:function(e){var n=Pr(),r=n[0],i=n[1];return[Dr(function(n){i(!0),t(a[4]).unstable_next(function(){var t=ir.suspense;ir.suspense=void 0===e?null:e;try{i(!1),n()}finally{ir.suspense=t}})},[e,r]),r]}},Hr=null,Br=null,jr=!1;function Qr(e,t){switch(e.tag){case 5:return null!==(t=it(e.type,e.pendingProps))&&(e.stateNode=t,!0);case 6:return null!==(t=it(e.pendingProps))&&(e.stateNode=t,!0);case 13:default:return!1}}function Yr(e){if(jr){var t=Br;if(t){var n=t;if(!Qr(e,t)){if(!(t=it())||!Qr(e,t))return e.effectTag=-1025&e.effectTag|2,jr=!1,void(Hr=e);var r=Hr,i=Vl(5,null,null,0);i.elementType="DELETED",i.type="DELETED",i.stateNode=n,i.return=r,i.effectTag=8,null!==r.lastEffect?(r.lastEffect.nextEffect=i,r.lastEffect=i):r.firstEffect=r.lastEffect=i}Hr=e,Br=it()}else e.effectTag=-1025&e.effectTag|2,jr=!1,Hr=e}}var Vr=t(a[3]).__SECRET_INTERNALS_DO_NOT_USE_OR_YOU_WILL_BE_FIRED.ReactCurrentOwner,Xr=!1;function qr(e,t,n,r){t.child=null===e?Qn(t,null,n,r):jn(t,e.child,n,r)}function $r(e,t,n,r,i){n=n.render;var l=t.ref;return vn(t,i),r=Tr(e,t,n,r,l,i),null===e||Xr?(t.effectTag|=1,qr(e,t,r,i),t.child):(t.updateQueue=e.updateQueue,t.effectTag&=-517,e.expirationTime<=i&&(e.expirationTime=0),di(e,t,i))}function Kr(e,t,n,r,i,l){if(null===e){var a=n.type;return"function"!=typeof a||Xl(a)||void 0!==a.defaultProps||null!==n.compare||void 0!==n.defaultProps?((e=Kl(n.type,null,r,null,t.mode,l)).ref=t.ref,e.return=t,t.child=e):(t.tag=15,t.type=a,Gr(e,t,a,r,i,l))}return a=e.child,it)&&sl.set(e,t))}}function vl(e,t){e.expirationTime(e=e.nextKnownPendingLevel)?t:e:t}function bl(e){if(0!==e.lastExpiredTime)e.callbackExpirationTime=1073741823,e.callbackPriority=99,e.callbackNode=tn(El.bind(null,e));else{var n=yl(e),r=e.callbackNode;if(0===n)null!==r&&(e.callbackNode=null,e.callbackExpirationTime=0,e.callbackPriority=90);else{var i=hl();if(1073741823===n?i=99:1===n||2===n?i=95:i=0>=(i=10*(1073741821-n)-10*(1073741821-i))?99:250>=i?98:5250>=i?97:95,null!==r){var l=e.callbackPriority;if(e.callbackExpirationTime===n&&l>=i)return;r!==Qt&&t(a[4]).unstable_cancelCallback(r)}e.callbackExpirationTime=n,e.callbackPriority=i,n=1073741823===n?tn(El.bind(null,e)):en(i,Tl.bind(null,e),{timeout:10*(1073741821-n)-Kt()}),e.callbackNode=n}}}function Tl(e,t){if(pl=0,t)return ia(e,t=hl()),bl(e),null;var n=yl(e);if(0!==n){if(t=e.callbackNode,(48&Qi)!==Ai)throw Error("Should not already be working.");if(Dl(),e===Yi&&n===Xi||xl(e,n),null!==Vi){var r=Qi;Qi|=Di;for(var i=kl();;)try{Cl();break}catch(t){_l(e,t)}if(pn(),Qi=r,Ui.current=i,qi===Fi)throw t=$i,xl(e,n),na(e,n),bl(e),t;if(null===Vi)switch(i=e.finishedWork=e.current.alternate,e.finishedExpirationTime=n,r=qi,Yi=null,r){case Li:case Fi:throw Error("Root did not complete. This is a bug in React.");case Wi:ia(e,2=n){e.lastPingedTime=n,xl(e,n);break}}if(0!==(l=yl(e))&&l!==n)break;if(0!==r&&r!==n){e.lastPingedTime=r;break}e.timeoutHandle=xt(zl.bind(null,e),i);break}zl(e);break;case Bi:if(na(e,n),n===(r=e.lastSuspendedTime)&&(e.nextKnownPendingLevel=Ul(i)),el&&(0===(i=e.lastPingedTime)||i>=n)){e.lastPingedTime=n,xl(e,n);break}if(0!==(i=yl(e))&&i!==n)break;if(0!==r&&r!==n){e.lastPingedTime=r;break}if(1073741823!==Gi?r=10*(1073741821-Gi)-Kt():1073741823===Ki?r=0:(r=10*(1073741821-Ki)-5e3,n=10*(1073741821-n)-(i=Kt()),0>(r=i-r)&&(r=0),n<(r=(120>r?120:480>r?480:1080>r?1080:1920>r?1920:3e3>r?3e3:4320>r?4320:1960*Ii(r/1960))-r)&&(r=n)),10=(r=0|a.busyMinDurationMs)?r=0:(i=0|a.busyDelayMs,r=(l=Kt()-(10*(1073741821-l)-(0|a.timeoutMs||5e3)))<=i?0:i+r-l),10 component higher in the tree to provide a loading indicator or placeholder to display."+Pt(i))}qi!==ji&&(qi=Wi),l=gi(l,i),u=r;do{switch(u.tag){case 3:a=l,u.effectTag|=4096,u.expirationTime=t,kn(u,Ri(u,a,t));break e;case 1:a=l;var v=u.type,y=u.stateNode;if(0==(64&u.effectTag)&&("function"==typeof v.getDerivedStateFromError||null!==y&&"function"==typeof y.componentDidCatch&&(null===al||!al.has(y)))){u.effectTag|=4096,u.expirationTime=t,kn(u,Ci(u,a,t));break e}}u=u.return}while(null!==u)}Vi=Il(Vi)}catch(e){t=e;continue}break}}function kl(){var e=Ui.current;return Ui.current=Lr,null===e?Lr:e}function wl(e,t){eZi&&(Zi=e)}function Rl(){for(;null!==Vi;)Vi=Nl(Vi)}function Cl(){for(;null!==Vi&&!t(a[4]).unstable_shouldYield();)Vi=Nl(Vi)}function Nl(e){var t=Ni(e.alternate,e,Xi);return e.memoizedProps=e.pendingProps,null===t&&(t=Il(e)),zi.current=null,t}function Il(e){Vi=e;do{var t=Vi.alternate;if(e=Vi.return,0==(2048&Vi.effectTag)){e:{var n=t,r=Xi,i=(t=Vi).pendingProps;switch(t.tag){case 2:case 16:break;case 15:case 0:break;case 1:Ot(t.type)&&Lt();break;case 3:Gn(),Ft(),(n=t.stateNode).pendingContext&&(n.context=n.pendingContext,n.pendingContext=null),ii(t);break;case 5:Zn(t);var l=$n(qn.current),a=t.type;if(null!==n&&null!=t.stateNode)li(n,t,a,i,l),n.ref!==t.ref&&(t.effectTag|=128);else if(i){$n(Vn.current),n=t,r=Tt,Tt+=2,a=bt(a);var o=$e(null,We,i,a.validAttributes);l=at(r,a.uiViewClassName,l,o,n),n=new Et(r,a,i,n),ri(n={node:l,canonical:n},t,!1,!1),t.stateNode=n,null!==t.ref&&(t.effectTag|=128)}else if(null===t.stateNode)throw Error("We must have new props for new mounts. This error is likely caused by a bug in React. Please file an issue.");break;case 6:if(n&&null!=t.stateNode)ai(n,t,n.memoizedProps,i);else{if("string"!=typeof i&&null===t.stateNode)throw Error("We must have new props for new mounts. This error is likely caused by a bug in React. Please file an issue.");n=$n(qn.current),l=$n(Vn.current),t.stateNode=St(i,n,l,t)}break;case 11:break;case 13:if(Nt(er),i=t.memoizedState,0!=(64&t.effectTag)){t.expirationTime=r;break e}i=null!==i,l=!1,null!==n&&(l=null!==(r=n.memoizedState),i||null===r||null!==(r=n.child.sibling)&&(null!==(a=t.firstEffect)?(t.firstEffect=r,r.nextEffect=a):(t.firstEffect=t.lastEffect=r,r.nextEffect=null),r.effectTag=8)),i&&!l&&0!=(2&t.mode)&&(null===n&&!0!==t.memoizedProps.unstable_avoidThisFallback||0!=(1&er.current)?qi===Li&&(qi=Hi):(qi!==Li&&qi!==Hi||(qi=Bi),0!==Zi&&null!==Yi&&(na(Yi,Xi),ra(Yi,Zi)))),i&&(t.effectTag|=4);break;case 7:case 8:case 12:break;case 4:Gn(),ii(t);break;case 10:mn(t);break;case 9:case 14:break;case 17:Ot(t.type)&&Lt();break;case 19:if(Nt(er),null===(i=t.memoizedState))break;if(l=0!=(64&t.effectTag),null===(a=i.rendering)){if(l)hi(i,!1);else if(qi!==Li||null!==n&&0!=(64&n.effectTag))for(n=t.child;null!==n;){if(null!==(a=tr(n))){for(t.effectTag|=64,hi(i,!1),null!==(n=a.updateQueue)&&(t.updateQueue=n,t.effectTag|=4),null===i.lastEffect&&(t.firstEffect=null),t.lastEffect=i.lastEffect,n=r,i=t.child;null!==i;)r=n,(l=i).effectTag&=2,l.nextEffect=null,l.firstEffect=null,l.lastEffect=null,null===(a=l.alternate)?(l.childExpirationTime=0,l.expirationTime=r,l.child=null,l.memoizedProps=null,l.memoizedState=null,l.updateQueue=null,l.dependencies=null):(l.childExpirationTime=a.childExpirationTime,l.expirationTime=a.expirationTime,l.child=a.child,l.memoizedProps=a.memoizedProps,l.memoizedState=a.memoizedState,l.updateQueue=a.updateQueue,r=a.dependencies,l.dependencies=null===r?null:{expirationTime:r.expirationTime,firstContext:r.firstContext,responders:r.responders}),i=i.sibling;It(er,1&er.current|2),t=t.child;break e}n=n.sibling}}else{if(!l)if(null!==(n=tr(a))){if(t.effectTag|=64,l=!0,null!==(n=n.updateQueue)&&(t.updateQueue=n,t.effectTag|=4),hi(i,!0),null===i.tail&&"hidden"===i.tailMode&&!a.alternate){null!==(t=t.lastEffect=i.lastEffect)&&(t.nextEffect=null);break}}else Kt()>i.tailExpiration&&1i&&(i=r),a>i&&(i=a),l=l.sibling;n.childExpirationTime=i}if(null!==t)return t;null!==e&&0==(2048&e.effectTag)&&(null===e.firstEffect&&(e.firstEffect=Vi.firstEffect),null!==Vi.lastEffect&&(null!==e.lastEffect&&(e.lastEffect.nextEffect=Vi.firstEffect),e.lastEffect=Vi.lastEffect),1(e=e.childExpirationTime)?t:e}function zl(e){var t=Gt();return Zt(99,Al.bind(null,e,t)),null}function Al(e,t){if(Dl(),(48&Qi)!==Ai)throw Error("Should not already be working.");var n=e.finishedWork,r=e.finishedExpirationTime;if(null===n)return null;if(e.finishedWork=null,e.finishedExpirationTime=0,n===e.current)throw Error("Cannot commit the same tree as before. This error is likely caused by a bug in React. Please file an issue.");e.callbackNode=null,e.callbackExpirationTime=0,e.callbackPriority=90,e.nextKnownPendingLevel=0;var i=Ul(n);if(e.firstPendingTime=i,r<=e.lastSuspendedTime?e.firstSuspendedTime=e.lastSuspendedTime=e.nextKnownPendingLevel=0:r<=e.firstSuspendedTime&&(e.firstSuspendedTime=r-1),r<=e.lastPingedTime&&(e.lastPingedTime=0),r<=e.lastExpiredTime&&(e.lastExpiredTime=0),e===Yi&&(Vi=Yi=null,Xi=0),1=n?ui(e,t,n):(It(er,1&er.current),null!==(t=di(e,t,n))?t.sibling:null);It(er,1&er.current);break;case 19:if(r=t.childExpirationTime>=n,0!=(64&e.effectTag)){if(r)return fi(e,t,n);t.effectTag|=64}var i=t.memoizedState;if(null!==i&&(i.rendering=null,i.tail=null),It(er,er.current),!r)return null}return di(e,t,n)}Xr=!1}else Xr=!1;switch(t.expirationTime=0,t.tag){case 2:if(r=t.type,null!==e&&(e.alternate=null,t.alternate=null,t.effectTag|=2),e=t.pendingProps,i=Dt(t,zt.current),vn(t,n),i=Tr(null,t,r,e,i,n),t.effectTag|=1,"object"==typeof i&&null!==i&&"function"==typeof i.render&&void 0===i.$$typeof){if(t.tag=1,Er(),Ot(r)){var l=!0;Bt(t)}else l=!1;t.memoizedState=null!==i.state&&void 0!==i.state?i.state:null;var a=r.getDerivedStateFromProps;"function"==typeof a&&zn(t,r,a,e),i.updater=An,t.stateNode=i,i._reactInternalFiber=t,Ln(t,r,e,n),t=ti(null,t,r,!0,l,n)}else t.tag=0,qr(null,t,i,n),t=t.child;return t;case 16:if(i=t.elementType,null!==e&&(e.alternate=null,t.alternate=null,t.effectTag|=2),e=t.pendingProps,ze(i),1!==i._status)throw i._result;switch(i=i._result,t.type=i,l=t.tag=ql(i),e=un(i,e),l){case 0:t=Zr(null,t,i,e,n);break;case 1:t=ei(null,t,i,e,n);break;case 11:t=$r(null,t,i,e,n);break;case 14:t=Kr(null,t,i,un(i.type,e),r,n);break;default:throw Error("Element type is invalid. Received a promise that resolves to: "+i+". Lazy element type must resolve to a class or function.")}return t;case 0:return r=t.type,i=t.pendingProps,Zr(e,t,r,i=t.elementType===r?i:un(r,i),n);case 1:return r=t.type,i=t.pendingProps,ei(e,t,r,i=t.elementType===r?i:un(r,i),n);case 3:if(ni(t),null===(r=t.updateQueue))throw Error("If the root does not have an updateQueue, we should have already bailed out. This error is likely caused by a bug in React. Please file an issue.");return i=null!==(i=t.memoizedState)?i.element:null,Rn(t,r,t.pendingProps,null,n),(r=t.memoizedState.element)===i?t=di(e,t,n):(qr(e,t,r,n),t=t.child),t;case 5:return Jn(t),null===e&&Yr(t),r=t.pendingProps.children,Jr(e,t),qr(e,t,r,n),t=t.child;case 6:return null===e&&Yr(t),null;case 13:return ui(e,t,n);case 4:return Kn(t,t.stateNode.containerInfo),r=t.pendingProps,null===e?t.child=jn(t,null,r,n):qr(e,t,r,n),t.child;case 11:return r=t.type,i=t.pendingProps,$r(e,t,r,i=t.elementType===r?i:un(r,i),n);case 7:return qr(e,t,t.pendingProps,n),t.child;case 8:case 12:return qr(e,t,t.pendingProps.children,n),t.child;case 10:e:{if(r=t.type._context,i=t.pendingProps,a=t.memoizedProps,hn(t,l=i.value),null!==a){var o=a.value;if(0===(l=ln(o,l)?0:0|("function"==typeof r._calculateChangedBits?r._calculateChangedBits(o,l):1073741823))){if(a.children===i.children&&!At.current){t=di(e,t,n);break e}}else for(null!==(o=t.child)&&(o.return=t);null!==o;){var u=o.dependencies;if(null!==u){a=o.child;for(var c=u.firstContext;null!==c;){if(c.context===r&&0!=(c.observedBits&l)){1===o.tag&&((c=Sn(n,null)).tag=2,_n(o,c)),o.expirationTime=t&&e<=t}function na(e,t){var n=e.firstSuspendedTime,r=e.lastSuspendedTime;nt||0===n)&&(e.lastSuspendedTime=t),t<=e.lastPingedTime&&(e.lastPingedTime=0),t<=e.lastExpiredTime&&(e.lastExpiredTime=0)}function ra(e,t){t>e.firstPendingTime&&(e.firstPendingTime=t);var n=e.firstSuspendedTime;0!==n&&(t>=n?e.firstSuspendedTime=e.lastSuspendedTime=e.nextKnownPendingLevel=0:t>=e.lastSuspendedTime&&(e.lastSuspendedTime=t+1),t>e.nextKnownPendingLevel&&(e.nextKnownPendingLevel=t))}function ia(e,t){var n=e.lastExpiredTime;(0===n||n>t)&&(e.lastExpiredTime=t)}function la(e){var t=e._reactInternalFiber;if(void 0===t){if("function"==typeof e.render)throw Error("Unable to find node on an unmounted component.");throw Error("Argument appears to not be a ReactComponent. Keys: "+Object.keys(e))}return null===(e=Le(t))?null:e.stateNode}function aa(e,t,n,r){var i=t.current,l=hl(),a=In.suspense;l=ml(l,i,a);e:if(n){n=n._reactInternalFiber;t:{if(Me(n)!==n||1!==n.tag)throw Error("Expected subtree parent to be a mounted class component. This error is likely caused by a bug in React. Please file an issue.");var o=n;do{switch(o.tag){case 3:o=o.stateNode.context;break t;case 1:if(Ot(o.type)){o=o.stateNode.__reactInternalMemoizedMergedChildContext;break t}}o=o.return}while(null!==o);throw Error("Found unexpected detached subtree parent. This error is likely caused by a bug in React. Please file an issue.")}if(1===n.tag){var u=n.type;if(Ot(u)){n=Ht(n,u,o);break e}}n=o}else n=Ut;return null===t.context?t.context=n:t.pendingContext=n,(t=Sn(l,a)).payload={element:e},null!==(r=void 0===r?null:r)&&(t.callback=r),_n(i,t),gl(i,l),l}function oa(e,t,n){var r=3=t.length?{done:!0}:{done:!1,value:t[u++]}}}throw new TypeError("Invalid attempt to iterate non-iterable instance.\nIn order to be iterable, non-array objects must have a [Symbol.iterator]() method.")}return(o=t[Symbol.iterator]()).next.bind(o)}function s(t,n){if(t){if("string"==typeof t)return f(t,n);var o=Object.prototype.toString.call(t).slice(8,-1);return"Object"===o&&t.constructor&&(o=t.constructor.name),"Map"===o||"Set"===o?Array.from(t):"Arguments"===o||/^(?:Ui|I)nt(?:8|16|32)(?:Clamped)?Array$/.test(o)?f(t,n):void 0}}function f(t,n){(null==n||n>t.length)&&(n=t.length);for(var o=0,u=new Array(n);oo&&(s+=c&&u?v.currentPageX:c&&!u?v.currentPageY:!c&&u?v.previousPageX:v.previousPageY,h=1);else for(var C=0;C=o){s+=c&&u?l.currentPageX:c&&!u?l.currentPageY:!c&&u?l.previousPageX:l.previousPageY,h++}}return h>0?s/h:n.noCentroid},currentCentroidXOfTouchesChangedAfter:function(t,o){return n.centroidDimension(t,o,!0,!0)},currentCentroidYOfTouchesChangedAfter:function(t,o){return n.centroidDimension(t,o,!1,!0)},previousCentroidXOfTouchesChangedAfter:function(t,o){return n.centroidDimension(t,o,!0,!1)},previousCentroidYOfTouchesChangedAfter:function(t,o){return n.centroidDimension(t,o,!1,!1)},currentCentroidX:function(t){return n.centroidDimension(t,0,!0,!0)},currentCentroidY:function(t){return n.centroidDimension(t,0,!1,!0)},noCentroid:-1};m.exports=n},367,[]); -__d(function(g,r,i,a,m,e,d){'use strict';var n=r(d[0])(r(d[1])),o=r(d[0])(r(d[2])),s=r(d[0])(r(d[3])),t=r(d[0])(r(d[4])),E=r(d[0])(r(d[5])),A=r(d[0])(r(d[6])),u=r(d[0])(r(d[7]));function c(n,o){var s=Object.keys(n);if(Object.getOwnPropertySymbols){var t=Object.getOwnPropertySymbols(n);o&&(t=t.filter(function(o){return Object.getOwnPropertyDescriptor(n,o).enumerable})),s.push.apply(s,t)}return s}function _(o){for(var s=1;s1&&void 0!==arguments[1]?arguments[1]:{};return r(d[5])('object'==typeof t&&null!==t,'Content to share must be a valid object'),r(d[5])('string'==typeof t.url||'string'==typeof t.message,'At least one of URL and message is required'),r(d[5])('object'==typeof n&&null!==n,'Options must be a valid object'),new Promise(function(s,u){var c=r(d[6])(n.tintColor);r(d[5])(o.default,'NativeActionSheetManager is not registered on iOS, but it should be.'),o.default.showShareActionSheetWithOptions({message:'string'==typeof t.message?t.message:void 0,url:'string'==typeof t.url?t.url:void 0,subject:n.subject,tintColor:null!=c?c:void 0,excludedActivityTypes:n.excludedActivityTypes},function(t){return u(t)},function(t,n){s(t?{action:'sharedAction',activityType:n}:{action:'dismissedAction'})})})}}]),s})());s.sharedAction='sharedAction',s.dismissedAction='dismissedAction',m.exports=s},374,[1,5,6,333,375,25,114]); -__d(function(g,r,i,a,m,e,d){'use strict';Object.defineProperty(e,"__esModule",{value:!0}),e.default=void 0;var t=r(d[0])(r(d[1])).get('ShareModule');e.default=t},375,[13,24]); -__d(function(g,r,i,a,m,e,d){'use strict';var t=r(d[0])(r(d[1])),n=r(d[0])(r(d[2])),u=r(d[0])(r(d[3])),c=r(d[0])(r(d[4])),f=r(d[0])(r(d[5]));function o(){if("undefined"==typeof Reflect||!Reflect.construct)return!1;if(Reflect.construct.sham)return!1;if("function"==typeof Proxy)return!0;try{return Date.prototype.toString.call(Reflect.construct(Date,[],function(){})),!0}catch(t){return!1}}var l=(function(f){(0,n.default)(h,f);var l,s,p=(l=h,s=o(),function(){var t,n=(0,c.default)(l);if(s){var f=(0,c.default)(this).constructor;t=Reflect.construct(n,arguments,f)}else t=n.apply(this,arguments);return(0,u.default)(this,t)});function h(){return(0,t.default)(this,h),p.apply(this,arguments)}return h})(r(d[6]));m.exports=new l(f.default)},376,[1,5,7,9,12,324,160]); -__d(function(g,r,i,a,m,e,d){'use strict';var t={show:function(t,o){r(d[0])(!1,'ToastAndroid is not supported on this platform.')},showWithGravity:function(t,o,s){r(d[0])(!1,'ToastAndroid is not supported on this platform.')},showWithGravityAndOffset:function(t,o,s,n,p){r(d[0])(!1,'ToastAndroid is not supported on this platform.')}};m.exports=t},377,[96]); -__d(function(g,r,i,a,m,e,d){'use strict';Object.defineProperty(e,"__esModule",{value:!0}),e.default=function(){var n=(0,r(d[2]).useMemo)(function(){return{getCurrentValue:function(){return t.default.getColorScheme()},subscribe:function(n){return t.default.addChangeListener(n),function(){return t.default.removeChangeListener(n)}}}},[]);return(0,r(d[3]).useSubscription)(n)};var t=r(d[0])(r(d[1]))},378,[1,334,14,379]); -__d(function(g,r,i,a,m,e,d){'use strict';m.exports=r(d[0])},379,[380]); -__d(function(g,r,i,a,m,e,d){'use strict';e.useSubscription=function(u){var t=u.getCurrentValue,n=u.subscribe,s=r(d[0]).useState(function(){return{getCurrentValue:t,subscribe:n,value:t()}});u=s[0];var c=s[1];return s=u.value,u.getCurrentValue===t&&u.subscribe===n||(s=t(),c({getCurrentValue:t,subscribe:n,value:s})),r(d[0]).useDebugValue(s),r(d[0]).useEffect(function(){function u(){if(!s){var u=t();c(function(s){return s.getCurrentValue!==t||s.subscribe!==n||s.value===u?s:r(d[1])({},s,{value:u})})}}var s=!1,b=n(u);return u(),function(){s=!0,b()}},[t,n]),s}},380,[14,16]); -__d(function(g,r,i,a,m,e,d){'use strict';Object.defineProperty(e,"__esModule",{value:!0}),e.default=function(){var u=(0,r(d[3]).useState)(function(){return n.default.get('window')}),f=(0,t.default)(u,2),o=f[0],c=f[1];return(0,r(d[3]).useEffect)(function(){function t(t){var n=t.window;c(n)}return n.default.addEventListener('change',t),c(n.default.get('window')),function(){n.default.removeEventListener('change',t)}},[]),o};var t=r(d[0])(r(d[1])),n=r(d[0])(r(d[2]))},381,[1,27,123,14]); -__d(function(g,r,i,a,m,e,d){'use strict';var t=r(d[0])(r(d[1])),n=!1,o=0,u=400;function f(f){var v=arguments.length>1&&void 0!==arguments[1]&&arguments[1];n||(n=!0,0===f[0]&&(t.default.vibrate(u),f=f.slice(1)),0!==f.length?setTimeout(function(){return l(++o,f,v,1)},f[0]):n=!1)}function l(f,v,c,s){if(n&&f===o){if(t.default.vibrate(u),s>=v.length){if(!c)return void(n=!1);s=0}setTimeout(function(){return l(f,v,c,s+1)},v[s])}}var v={vibrate:function(){var o=arguments.length>0&&void 0!==arguments[0]?arguments[0]:u,l=arguments.length>1&&void 0!==arguments[1]&&arguments[1];if(!n)if('number'==typeof o)t.default.vibrate(o);else{if(!Array.isArray(o))throw new Error('Vibration pattern should be a number or array');f(o,l)}},cancel:function(){n=!1}};m.exports=v},382,[1,383]); -__d(function(g,r,i,a,m,e,d){'use strict';Object.defineProperty(e,"__esModule",{value:!0}),e.default=void 0;var t=r(d[0])(r(d[1])).getEnforcing('Vibration');e.default=t},383,[13,24]); -__d(function(g,r,i,a,m,e,d){'use strict';var t,n=r(d[0])(r(d[1])),u=r(d[0])(r(d[2])),o=r(d[0])(r(d[3])),c=r(d[0])(r(d[4])),l=r(d[0])(r(d[5]));r(d[0])(r(d[6])),r(d[7])(r(d[8])),r(d[0])(r(d[9]));function f(){if("undefined"==typeof Reflect||!Reflect.construct)return!1;if(Reflect.construct.sham)return!1;if("function"==typeof Proxy)return!0;try{return Date.prototype.toString.call(Reflect.construct(Date,[],function(){})),!0}catch(t){return!1}}t=(function(t){(0,o.default)(v,t);var s,y,p=(s=v,y=f(),function(){var t,n=(0,l.default)(s);if(y){var u=(0,l.default)(this).constructor;t=Reflect.construct(n,arguments,u)}else t=n.apply(this,arguments);return(0,c.default)(this,t)});function v(){return(0,n.default)(this,v),p.apply(this,arguments)}return(0,u.default)(v,[{key:"render",value:function(){return null}}],[{key:"ignoreWarnings",value:function(t){}},{key:"install",value:function(){}},{key:"uninstall",value:function(){}},{key:"__unstable_enableLogBox",value:function(){}},{key:"__unstable_isLogBoxEnabled",value:function(){return!1}}]),v})(r(d[10]).Component),m.exports=t},384,[1,5,6,7,9,12,385,13,66,77,14]); -__d(function(g,r,i,a,m,e,d){'use strict';function t(t,n){var s=Object.keys(t);if(Object.getOwnPropertySymbols){var c=Object.getOwnPropertySymbols(t);n&&(c=c.filter(function(n){return Object.getOwnPropertyDescriptor(t,n).enumerable})),s.push.apply(s,c)}return s}function n(n){for(var s=1;s=t.length?{done:!0}:{done:!1,value:t[u++]}}}throw new TypeError("Invalid attempt to iterate non-iterable instance.\nIn order to be iterable, non-array objects must have a [Symbol.iterator]() method.")}return(s=t[Symbol.iterator]()).next.bind(s)}function n(t,n){if(t){if("string"==typeof t)return o(t,n);var s=Object.prototype.toString.call(t).slice(8,-1);return"Object"===s&&t.constructor&&(s=t.constructor.name),"Map"===s||"Set"===s?Array.from(t):"Arguments"===s||/^(?:Ui|I)nt(?:8|16|32)(?:Clamped)?Array$/.test(s)?o(t,n):void 0}}function o(t,n){(null==n||n>t.length)&&(n=t.length);for(var o=0,s=new Array(n);ot){var p=o.substr(t,u.offset-t);s.push(r(d[3]).createElement(r(d[4]),{key:l},p))}var c=o.substr(u.offset,u.length);return s.push(r(d[3]).createElement(r(d[4]),{key:l+'.5',style:n},c)),u.offset+u.length},0);if(u=t.length?{done:!0}:{done:!1,value:t[c++]}}}throw new TypeError("Invalid attempt to iterate non-iterable instance.\nIn order to be iterable, non-array objects must have a [Symbol.iterator]() method.")}return(l=t[Symbol.iterator]()).next.bind(l)}function n(t,n){if(t){if("string"==typeof t)return o(t,n);var l=Object.prototype.toString.call(t).slice(8,-1);return"Object"===l&&t.constructor&&(l=t.constructor.name),"Map"===l||"Set"===l?Array.from(t):"Arguments"===l||/^(?:Ui|I)nt(?:8|16|32)(?:Clamped)?Array$/.test(l)?o(t,n):void 0}}function o(t,n){(null==n||n>t.length)&&(n=t.length);for(var o=0,l=new Array(n);o=t.length?{done:!0}:{done:!1,value:t[s++]}}}throw new TypeError("Invalid attempt to iterate non-iterable instance.\nIn order to be iterable, non-array objects must have a [Symbol.iterator]() method.")}return(o=t[Symbol.iterator]()).next.bind(o)}function c(t,n){if(t){if("string"==typeof t)return f(t,n);var o=Object.prototype.toString.call(t).slice(8,-1);return"Object"===o&&t.constructor&&(o=t.constructor.name),"Map"===o||"Set"===o?Array.from(t):"Arguments"===o||/^(?:Ui|I)nt(?:8|16|32)(?:Clamped)?Array$/.test(o)?f(t,n):void 0}}function f(t,n){(null==n||n>t.length)&&(n=t.length);for(var o=0,s=new Array(n);o/test/**/*.test.js'], - testTimeout: 120000, + testTimeout: 240000, maxWorkers: 1, globalSetup: 'detox/runners/jest/globalSetup', globalTeardown: 'detox/runners/jest/globalTeardown', diff --git a/js/react_native/e2e/yarn.lock b/js/react_native/e2e/yarn.lock index a2f46e8e376c2..0adce305cd785 100644 --- a/js/react_native/e2e/yarn.lock +++ b/js/react_native/e2e/yarn.lock @@ -47,6 +47,13 @@ "@babel/highlight" "^7.22.13" chalk "^2.4.2" +"@babel/code-frame@~7.10.4": + version "7.10.4" + resolved "https://registry.yarnpkg.com/@babel/code-frame/-/code-frame-7.10.4.tgz#168da1a36e90da68ae8d49c0f1b48c7c6249213a" + integrity sha512-vG6SvB6oYEhvgisZNFRmRCUkLz11c7rp+tbNTynGqc6mS1d5ATd/sGyV6W0KZZnXRKMTzZDRgQT3Ou9jhpAfUg== + dependencies: + "@babel/highlight" "^7.10.4" + "@babel/compat-data@^7.13.11", "@babel/compat-data@^7.17.10": version "7.18.5" resolved "https://registry.yarnpkg.com/@babel/compat-data/-/compat-data-7.18.5.tgz#acac0c839e317038c73137fbb6ef71a1d6238471" @@ -445,6 +452,11 @@ resolved "https://registry.yarnpkg.com/@babel/helper-validator-identifier/-/helper-validator-identifier-7.22.20.tgz#c4ae002c61d2879e724581d96665583dbc1dc0e0" integrity sha512-Y4OZ+ytlatR8AI+8KZfKuL5urKp7qey08ha31L8b3BwewJAoJamTzyvxPR/5D+KkdJCGPq/+8TukHBlY10FX9A== +"@babel/helper-validator-identifier@^7.25.9": + version "7.25.9" + resolved "https://registry.yarnpkg.com/@babel/helper-validator-identifier/-/helper-validator-identifier-7.25.9.tgz#24b64e2c3ec7cd3b3c547729b8d16871f22cbdc7" + integrity sha512-Ed61U6XJc3CVRfkERJWDz4dJwKe7iLmmJsbOGu9wSloNSFttHV0I8g6UAgb7qnK5ly5bGLPd4oXZlxCdANBOWQ== + "@babel/helper-validator-option@^7.16.7": version "7.16.7" resolved "https://registry.yarnpkg.com/@babel/helper-validator-option/-/helper-validator-option-7.16.7.tgz#b203ce62ce5fe153899b617c08957de860de4d23" @@ -493,6 +505,16 @@ "@babel/traverse" "^7.21.0" "@babel/types" "^7.21.0" +"@babel/highlight@^7.10.4": + version "7.25.9" + resolved "https://registry.yarnpkg.com/@babel/highlight/-/highlight-7.25.9.tgz#8141ce68fc73757946f983b343f1231f4691acc6" + integrity sha512-llL88JShoCsth8fF8R4SJnIn+WLvR6ccFxu1H3FlMhDontdcmZWf2HgIZ7AIqV3Xcck1idlohrN4EUBQz6klbw== + dependencies: + "@babel/helper-validator-identifier" "^7.25.9" + chalk "^2.4.2" + js-tokens "^4.0.0" + picocolors "^1.0.0" + "@babel/highlight@^7.16.7": version "7.17.12" resolved "https://registry.yarnpkg.com/@babel/highlight/-/highlight-7.17.12.tgz#257de56ee5afbd20451ac0a75686b6b404257351" @@ -857,13 +879,6 @@ "@babel/helper-create-regexp-features-plugin" "^7.18.6" "@babel/helper-plugin-utils" "^7.18.6" -"@babel/plugin-transform-object-assign@^7.0.0": - version "7.16.7" - resolved "https://registry.yarnpkg.com/@babel/plugin-transform-object-assign/-/plugin-transform-object-assign-7.16.7.tgz#5fe08d63dccfeb6a33aa2638faf98e5c584100f8" - integrity sha512-R8mawvm3x0COTJtveuoqZIjNypn2FjfvXZr4pSQ8VhEFBuQGBz4XhHasZtHXjgXU4XptZ4HtGof3NoYc93ZH9Q== - dependencies: - "@babel/helper-plugin-utils" "^7.16.7" - "@babel/plugin-transform-object-super@^7.0.0": version "7.16.7" resolved "https://registry.yarnpkg.com/@babel/plugin-transform-object-super/-/plugin-transform-object-super-7.16.7.tgz#ac359cf8d32cf4354d27a46867999490b6c32a94" @@ -918,14 +933,6 @@ "@babel/plugin-syntax-jsx" "^7.17.12" "@babel/types" "^7.17.12" -"@babel/plugin-transform-regenerator@^7.0.0": - version "7.18.0" - resolved "https://registry.yarnpkg.com/@babel/plugin-transform-regenerator/-/plugin-transform-regenerator-7.18.0.tgz#44274d655eb3f1af3f3a574ba819d3f48caf99d5" - integrity sha512-C8YdRw9uzx25HSIzwA7EM7YP0FhCe5wNvJbZzjVNHHPGVcDJ3Aie+qGYYdS1oVQgn+B3eAIJbWFLrJ4Jipv7nw== - dependencies: - "@babel/helper-plugin-utils" "^7.17.12" - regenerator-transform "^0.15.0" - "@babel/plugin-transform-runtime@^7.0.0": version "7.18.5" resolved "https://registry.yarnpkg.com/@babel/plugin-transform-runtime/-/plugin-transform-runtime-7.18.5.tgz#f4d3188ba6a8815793993c71c2c225d0ee1d7743" @@ -1020,7 +1027,7 @@ dependencies: regenerator-runtime "^0.13.4" -"@babel/runtime@^7.17.0", "@babel/runtime@^7.8.4": +"@babel/runtime@^7.17.0": version "7.18.3" resolved "https://registry.yarnpkg.com/@babel/runtime/-/runtime-7.18.3.tgz#c7b654b57f6f63cf7f8b418ac9ca04408c4579f4" integrity sha512-38Y8f7YUhce/K7RMwTp7m0uCumpv9hZkitCbBClqQIow1qSbCvGkcegKOXpEWCQLfWmevgRiWokZ1GkpfhbZug== @@ -1118,6 +1125,77 @@ resolved "https://registry.yarnpkg.com/@bcoe/v8-coverage/-/v8-coverage-0.2.3.tgz#75a2e8b51cb758a7553d6804a5932d7aace75c39" integrity sha512-0hYQ8SB4Db5zvZB4axdMHGwEaQjkZzFjQiN9LVYvIFB2nSUHW9tYpxWriPrWDASIxiaXax83REcLxuSdnGPZtw== +"@expo/config-plugins@^7.2.4": + version "7.9.2" + resolved "https://registry.yarnpkg.com/@expo/config-plugins/-/config-plugins-7.9.2.tgz#fc18e84761067ce670742d735b888613c06fbea8" + integrity sha512-sRU/OAp7kJxrCUiCTUZqvPMKPdiN1oTmNfnbkG4oPdfWQTpid3jyCH7ZxJEN5SI6jrY/ZsK5B/JPgjDUhuWLBQ== + dependencies: + "@expo/config-types" "^50.0.0-alpha.1" + "@expo/fingerprint" "^0.6.0" + "@expo/json-file" "~8.3.0" + "@expo/plist" "^0.1.0" + "@expo/sdk-runtime-versions" "^1.0.0" + "@react-native/normalize-color" "^2.0.0" + chalk "^4.1.2" + debug "^4.3.1" + find-up "~5.0.0" + getenv "^1.0.0" + glob "7.1.6" + resolve-from "^5.0.0" + semver "^7.5.3" + slash "^3.0.0" + slugify "^1.6.6" + xcode "^3.0.1" + xml2js "0.6.0" + +"@expo/config-types@^50.0.0-alpha.1": + version "50.0.1" + resolved "https://registry.yarnpkg.com/@expo/config-types/-/config-types-50.0.1.tgz#12d889214dedf64fbf2322c9d9e75c9d5ca7f695" + integrity sha512-EZHMgzkWRB9SMHO1e9m8s+OMahf92XYTnsCFjxhSfcDrcEoSdFPyJWDJVloHZPMGhxns7Fi2+A+bEVN/hD4NKA== + +"@expo/fingerprint@^0.6.0": + version "0.6.1" + resolved "https://registry.yarnpkg.com/@expo/fingerprint/-/fingerprint-0.6.1.tgz#763ae79b06f60e10853596bfa2bd730bfb13f2b0" + integrity sha512-ggLn6unI6qowlA1FihdQwPpLn16VJulYkvYAEL50gaqVahfNEglRQMSH2giZzjD0d6xq2/EQuUdFyHaJfyJwOQ== + dependencies: + "@expo/spawn-async" "^1.5.0" + chalk "^4.1.2" + debug "^4.3.4" + find-up "^5.0.0" + minimatch "^3.0.4" + p-limit "^3.1.0" + resolve-from "^5.0.0" + +"@expo/json-file@~8.3.0": + version "8.3.3" + resolved "https://registry.yarnpkg.com/@expo/json-file/-/json-file-8.3.3.tgz#7926e3592f76030ce63d6b1308ac8f5d4d9341f4" + integrity sha512-eZ5dld9AD0PrVRiIWpRkm5aIoWBw3kAyd8VkuWEy92sEthBKDDDHAnK2a0dw0Eil6j7rK7lS/Qaq/Zzngv2h5A== + dependencies: + "@babel/code-frame" "~7.10.4" + json5 "^2.2.2" + write-file-atomic "^2.3.0" + +"@expo/plist@^0.1.0": + version "0.1.3" + resolved "https://registry.yarnpkg.com/@expo/plist/-/plist-0.1.3.tgz#b4fbee2c4f7a88512a4853d85319f4d95713c529" + integrity sha512-GW/7hVlAylYg1tUrEASclw1MMk9FP4ZwyFAY/SUTJIhPDQHtfOlXREyWV3hhrHdX/K+pS73GNgdfT6E/e+kBbg== + dependencies: + "@xmldom/xmldom" "~0.7.7" + base64-js "^1.2.3" + xmlbuilder "^14.0.0" + +"@expo/sdk-runtime-versions@^1.0.0": + version "1.0.0" + resolved "https://registry.yarnpkg.com/@expo/sdk-runtime-versions/-/sdk-runtime-versions-1.0.0.tgz#d7ebd21b19f1c6b0395e50d78da4416941c57f7c" + integrity sha512-Doz2bfiPndXYFPMRwPyGa1k5QaKDVpY806UJj570epIiMzWaYyCtobasyfC++qfIXVb5Ocy7r3tP9d62hAQ7IQ== + +"@expo/spawn-async@^1.5.0": + version "1.7.2" + resolved "https://registry.yarnpkg.com/@expo/spawn-async/-/spawn-async-1.7.2.tgz#fcfe66c3e387245e72154b1a7eae8cada6a47f58" + integrity sha512-QdWi16+CHB9JYP7gma19OVVg0BFkvU8zNj9GjWorYI8Iv8FUxjOCcYRuAmX4s/h91e4e7BPsskc8cSrZYho9Ew== + dependencies: + cross-spawn "^7.0.3" + "@hapi/hoek@^9.0.0": version "9.3.0" resolved "https://registry.yarnpkg.com/@hapi/hoek/-/hoek-9.3.0.tgz#8368869dcb735be2e7f5cb7647de78e167a251fb" @@ -1439,42 +1517,42 @@ "@jridgewell/resolve-uri" "^3.0.3" "@jridgewell/sourcemap-codec" "^1.4.10" -"@react-native-community/cli-clean@^8.0.4": - version "8.0.4" - resolved "https://registry.yarnpkg.com/@react-native-community/cli-clean/-/cli-clean-8.0.4.tgz#97e16a20e207b95de12e29b03816e8f2b2c80cc7" - integrity sha512-IwS1M1NHg6+qL8PThZYMSIMYbZ6Zbx+lIck9PLBskbosFo24M3lCOflOl++Bggjakp6mR+sRXxLMexid/GeOsQ== +"@react-native-community/cli-clean@^9.2.1": + version "9.2.1" + resolved "https://registry.yarnpkg.com/@react-native-community/cli-clean/-/cli-clean-9.2.1.tgz#198c5dd39c432efb5374582073065ff75d67d018" + integrity sha512-dyNWFrqRe31UEvNO+OFWmQ4hmqA07bR9Ief/6NnGwx67IO9q83D5PEAf/o96ML6jhSbDwCmpPKhPwwBbsyM3mQ== dependencies: - "@react-native-community/cli-tools" "^8.0.4" + "@react-native-community/cli-tools" "^9.2.1" chalk "^4.1.2" execa "^1.0.0" prompts "^2.4.0" -"@react-native-community/cli-config@^8.0.6": - version "8.0.6" - resolved "https://registry.yarnpkg.com/@react-native-community/cli-config/-/cli-config-8.0.6.tgz#041eee7dd8fdef595bf7a3f24228c173bf294a44" - integrity sha512-mjVpVvdh8AviiO8xtqeX+BkjqE//NMDnISwsLWSJUfNCwTAPmdR8PGbhgP5O4hWHyJ3WkepTopl0ya7Tfi3ifw== +"@react-native-community/cli-config@^9.2.1": + version "9.2.1" + resolved "https://registry.yarnpkg.com/@react-native-community/cli-config/-/cli-config-9.2.1.tgz#54eb026d53621ccf3a9df8b189ac24f6e56b8750" + integrity sha512-gHJlBBXUgDN9vrr3aWkRqnYrPXZLztBDQoY97Mm5Yo6MidsEpYo2JIP6FH4N/N2p1TdjxJL4EFtdd/mBpiR2MQ== dependencies: - "@react-native-community/cli-tools" "^8.0.4" + "@react-native-community/cli-tools" "^9.2.1" cosmiconfig "^5.1.0" deepmerge "^3.2.0" glob "^7.1.3" joi "^17.2.1" -"@react-native-community/cli-debugger-ui@^8.0.0": - version "8.0.0" - resolved "https://registry.yarnpkg.com/@react-native-community/cli-debugger-ui/-/cli-debugger-ui-8.0.0.tgz#98263dc525e65015e2d6392c940114028f87e8e9" - integrity sha512-u2jq06GZwZ9sRERzd9FIgpW6yv4YOW4zz7Ym/B8eSzviLmy3yI/8mxJtvlGW+J8lBsfMcQoqJpqI6Rl1nZy9yQ== +"@react-native-community/cli-debugger-ui@^9.0.0": + version "9.0.0" + resolved "https://registry.yarnpkg.com/@react-native-community/cli-debugger-ui/-/cli-debugger-ui-9.0.0.tgz#ea5c5dad6008bccd840d858e160d42bb2ced8793" + integrity sha512-7hH05ZwU9Tp0yS6xJW0bqcZPVt0YCK7gwj7gnRu1jDNN2kughf6Lg0Ys29rAvtZ7VO1PK5c1O+zs7yFnylQDUA== dependencies: serve-static "^1.13.1" -"@react-native-community/cli-doctor@^8.0.6": - version "8.0.6" - resolved "https://registry.yarnpkg.com/@react-native-community/cli-doctor/-/cli-doctor-8.0.6.tgz#954250155ab2f3a66a54821e071bc4a631d2dfff" - integrity sha512-ZQqyT9mJMVeFEVIwj8rbDYGCA2xXjJfsQjWk2iTRZ1CFHfhPSUuUiG8r6mJmTinAP9t+wYcbbIYzNgdSUKnDMw== +"@react-native-community/cli-doctor@^9.3.0": + version "9.3.0" + resolved "https://registry.yarnpkg.com/@react-native-community/cli-doctor/-/cli-doctor-9.3.0.tgz#8817a3fd564453467def5b5bc8aecdc4205eff50" + integrity sha512-/fiuG2eDGC2/OrXMOWI5ifq4X1gdYTQhvW2m0TT5Lk1LuFiZsbTCp1lR+XILKekuTvmYNjEGdVpeDpdIWlXdEA== dependencies: - "@react-native-community/cli-config" "^8.0.6" - "@react-native-community/cli-platform-ios" "^8.0.6" - "@react-native-community/cli-tools" "^8.0.4" + "@react-native-community/cli-config" "^9.2.1" + "@react-native-community/cli-platform-ios" "^9.3.0" + "@react-native-community/cli-tools" "^9.2.1" chalk "^4.1.2" command-exists "^1.2.8" envinfo "^7.7.2" @@ -1489,69 +1567,64 @@ sudo-prompt "^9.0.0" wcwidth "^1.0.1" -"@react-native-community/cli-hermes@^8.0.5": - version "8.0.5" - resolved "https://registry.yarnpkg.com/@react-native-community/cli-hermes/-/cli-hermes-8.0.5.tgz#639edc6b0ce73f705e4b737e3de1cc47d42516ff" - integrity sha512-Zm0wM6SfgYAEX1kfJ1QBvTayabvh79GzmjHyuSnEROVNPbl4PeCG4WFbwy489tGwOP9Qx9fMT5tRIFCD8bp6/g== +"@react-native-community/cli-hermes@^9.3.4": + version "9.3.4" + resolved "https://registry.yarnpkg.com/@react-native-community/cli-hermes/-/cli-hermes-9.3.4.tgz#47851847c4990272687883bd8bf53733d5f3c341" + integrity sha512-VqTPA7kknCXgtYlRf+sDWW4yxZ6Gtg1Ga+Rdrn1qSKuo09iJ8YKPoQYOu5nqbIYJQAEhorWQyo1VvNgd0wd49w== dependencies: - "@react-native-community/cli-platform-android" "^8.0.5" - "@react-native-community/cli-tools" "^8.0.4" + "@react-native-community/cli-platform-android" "^9.3.4" + "@react-native-community/cli-tools" "^9.2.1" chalk "^4.1.2" hermes-profile-transformer "^0.0.6" ip "^1.1.5" -"@react-native-community/cli-platform-android@^8.0.4", "@react-native-community/cli-platform-android@^8.0.5": - version "8.0.5" - resolved "https://registry.yarnpkg.com/@react-native-community/cli-platform-android/-/cli-platform-android-8.0.5.tgz#da11d2678adeca98e83494d68de80e50571b4af4" - integrity sha512-z1YNE4T1lG5o9acoQR1GBvf7mq6Tzayqo/za5sHVSOJAC9SZOuVN/gg/nkBa9a8n5U7qOMFXfwhTMNqA474gXA== +"@react-native-community/cli-platform-android@9.3.4", "@react-native-community/cli-platform-android@^9.3.4": + version "9.3.4" + resolved "https://registry.yarnpkg.com/@react-native-community/cli-platform-android/-/cli-platform-android-9.3.4.tgz#42f22943b6ee15713add6af8608c1d0ebf79d774" + integrity sha512-BTKmTMYFuWtMqimFQJfhRyhIWw1m+5N5svR1S5+DqPcyFuSXrpNYDWNSFR8E105xUbFANmsCZZQh6n1WlwMpOA== dependencies: - "@react-native-community/cli-tools" "^8.0.4" + "@react-native-community/cli-tools" "^9.2.1" chalk "^4.1.2" execa "^1.0.0" fs-extra "^8.1.0" glob "^7.1.3" - jetifier "^1.6.2" - lodash "^4.17.15" logkitty "^0.7.1" slash "^3.0.0" -"@react-native-community/cli-platform-ios@^8.0.4", "@react-native-community/cli-platform-ios@^8.0.6": - version "8.0.6" - resolved "https://registry.yarnpkg.com/@react-native-community/cli-platform-ios/-/cli-platform-ios-8.0.6.tgz#ab80cd4eb3014b8fcfc9bd1b53ec0a9f8e5d1430" - integrity sha512-CMR6mu/LVx6JVfQRDL9uULsMirJT633bODn+IrYmrwSz250pnhON16We8eLPzxOZHyDjm7JPuSgHG3a/BPiRuQ== +"@react-native-community/cli-platform-ios@9.3.0", "@react-native-community/cli-platform-ios@^9.3.0": + version "9.3.0" + resolved "https://registry.yarnpkg.com/@react-native-community/cli-platform-ios/-/cli-platform-ios-9.3.0.tgz#45abde2a395fddd7cf71e8b746c1dc1ee2260f9a" + integrity sha512-nihTX53BhF2Q8p4B67oG3RGe1XwggoGBrMb6vXdcu2aN0WeXJOXdBLgR900DAA1O8g7oy1Sudu6we+JsVTKnjw== dependencies: - "@react-native-community/cli-tools" "^8.0.4" + "@react-native-community/cli-tools" "^9.2.1" chalk "^4.1.2" execa "^1.0.0" glob "^7.1.3" - js-yaml "^3.13.1" - lodash "^4.17.15" ora "^5.4.1" - plist "^3.0.2" -"@react-native-community/cli-plugin-metro@^8.0.4": - version "8.0.4" - resolved "https://registry.yarnpkg.com/@react-native-community/cli-plugin-metro/-/cli-plugin-metro-8.0.4.tgz#a364a50a2e05fc5d0b548759e499e5b681b6e4cc" - integrity sha512-UWzY1eMcEr/6262R2+d0Is5M3L/7Y/xXSDIFMoc5Rv5Wucl3hJM/TxHXmByvHpuJf6fJAfqOskyt4bZCvbI+wQ== +"@react-native-community/cli-plugin-metro@^9.3.3": + version "9.3.3" + resolved "https://registry.yarnpkg.com/@react-native-community/cli-plugin-metro/-/cli-plugin-metro-9.3.3.tgz#330d7b9476a3fdabdd5863f114fa962289e280dc" + integrity sha512-lPBw6XieNdj2AbWDN0Rc+jNOx8hBgSQyv0gUAm01qtJe4I9FjSMU6nOGTxMpWpICo6TYl/cmPGXOzbfpwxwtkQ== dependencies: - "@react-native-community/cli-server-api" "^8.0.4" - "@react-native-community/cli-tools" "^8.0.4" + "@react-native-community/cli-server-api" "^9.2.1" + "@react-native-community/cli-tools" "^9.2.1" chalk "^4.1.2" - metro "^0.70.1" - metro-config "^0.70.1" - metro-core "^0.70.1" - metro-react-native-babel-transformer "^0.70.1" - metro-resolver "^0.70.1" - metro-runtime "^0.70.1" + metro "0.72.4" + metro-config "0.72.4" + metro-core "0.72.4" + metro-react-native-babel-transformer "0.72.4" + metro-resolver "0.72.4" + metro-runtime "0.72.4" readline "^1.3.0" -"@react-native-community/cli-server-api@^8.0.4": - version "8.0.4" - resolved "https://registry.yarnpkg.com/@react-native-community/cli-server-api/-/cli-server-api-8.0.4.tgz#d45d895a0a6e8b960c9d677188d414a996faa4d3" - integrity sha512-Orr14njx1E70CVrUA8bFdl+mrnbuXUjf1Rhhm0RxUadFpvkHuOi5dh8Bryj2MKtf8eZrpEwZ7tuQPhJEULW16A== +"@react-native-community/cli-server-api@^9.2.1": + version "9.2.1" + resolved "https://registry.yarnpkg.com/@react-native-community/cli-server-api/-/cli-server-api-9.2.1.tgz#41ac5916b21d324bccef447f75600c03b2f54fbe" + integrity sha512-EI+9MUxEbWBQhWw2PkhejXfkcRqPl+58+whlXJvKHiiUd7oVbewFs0uLW0yZffUutt4FGx6Uh88JWEgwOzAdkw== dependencies: - "@react-native-community/cli-debugger-ui" "^8.0.0" - "@react-native-community/cli-tools" "^8.0.4" + "@react-native-community/cli-debugger-ui" "^9.0.0" + "@react-native-community/cli-tools" "^9.2.1" compression "^1.7.1" connect "^3.6.5" errorhandler "^1.5.0" @@ -1560,15 +1633,14 @@ serve-static "^1.13.1" ws "^7.5.1" -"@react-native-community/cli-tools@^8.0.4": - version "8.0.4" - resolved "https://registry.yarnpkg.com/@react-native-community/cli-tools/-/cli-tools-8.0.4.tgz#994b9d56c84472491c876b71acd4356773fcbe65" - integrity sha512-ePN9lGxh6LRFiotyddEkSmuqpQhnq2iw9oiXYr4EFWpIEy0yCigTuSTiDF68+c8M9B+7bTwkRpz/rMPC4ViO5Q== +"@react-native-community/cli-tools@^9.2.1": + version "9.2.1" + resolved "https://registry.yarnpkg.com/@react-native-community/cli-tools/-/cli-tools-9.2.1.tgz#c332324b1ea99f9efdc3643649bce968aa98191c" + integrity sha512-bHmL/wrKmBphz25eMtoJQgwwmeCylbPxqFJnFSbkqJPXQz3ManQ6q/gVVMqFyz7D3v+riaus/VXz3sEDa97uiQ== dependencies: appdirsjs "^1.2.4" chalk "^4.1.2" find-up "^5.0.0" - lodash "^4.17.15" mime "^2.4.1" node-fetch "^2.6.0" open "^6.2.0" @@ -1576,36 +1648,33 @@ semver "^6.3.0" shell-quote "^1.7.3" -"@react-native-community/cli-types@^8.0.0": - version "8.0.0" - resolved "https://registry.yarnpkg.com/@react-native-community/cli-types/-/cli-types-8.0.0.tgz#72d24178e5ed1c2d271da43e0a4a4f59178f261a" - integrity sha512-1lZS1PEvMlFaN3Se1ksyoFWzMjk+YfKi490GgsqKJln9gvFm8tqVPdnXttI5Uf2DQf3BMse8Bk8dNH4oV6Ewow== +"@react-native-community/cli-types@^9.1.0": + version "9.1.0" + resolved "https://registry.yarnpkg.com/@react-native-community/cli-types/-/cli-types-9.1.0.tgz#dcd6a0022f62790fe1f67417f4690db938746aab" + integrity sha512-KDybF9XHvafLEILsbiKwz5Iobd+gxRaPyn4zSaAerBxedug4er5VUWa8Szy+2GeYKZzMh/gsb1o9lCToUwdT/g== dependencies: joi "^17.2.1" -"@react-native-community/cli@^8.0.4": - version "8.0.6" - resolved "https://registry.yarnpkg.com/@react-native-community/cli/-/cli-8.0.6.tgz#7aae37843ab8e44b75c477c1de69f4c902e599ef" - integrity sha512-E36hU/if3quQCfJHGWVkpsCnwtByRCwORuAX0r6yr1ebKktpKeEO49zY9PAu/Z1gfyxCtgluXY0HfRxjKRFXTg== - dependencies: - "@react-native-community/cli-clean" "^8.0.4" - "@react-native-community/cli-config" "^8.0.6" - "@react-native-community/cli-debugger-ui" "^8.0.0" - "@react-native-community/cli-doctor" "^8.0.6" - "@react-native-community/cli-hermes" "^8.0.5" - "@react-native-community/cli-plugin-metro" "^8.0.4" - "@react-native-community/cli-server-api" "^8.0.4" - "@react-native-community/cli-tools" "^8.0.4" - "@react-native-community/cli-types" "^8.0.0" +"@react-native-community/cli@9.3.5": + version "9.3.5" + resolved "https://registry.yarnpkg.com/@react-native-community/cli/-/cli-9.3.5.tgz#73626d3be8f5e2e6389f2555d126666fb8de4389" + integrity sha512-X+/xSysHsb0rXUWZKtXnKGhUNMRPxYzyhBc3VMld+ygPaFG57TAdK9rFGRu7NkIsRI6qffF/SukQPVlBZIfBHg== + dependencies: + "@react-native-community/cli-clean" "^9.2.1" + "@react-native-community/cli-config" "^9.2.1" + "@react-native-community/cli-debugger-ui" "^9.0.0" + "@react-native-community/cli-doctor" "^9.3.0" + "@react-native-community/cli-hermes" "^9.3.4" + "@react-native-community/cli-plugin-metro" "^9.3.3" + "@react-native-community/cli-server-api" "^9.2.1" + "@react-native-community/cli-tools" "^9.2.1" + "@react-native-community/cli-types" "^9.1.0" chalk "^4.1.2" - commander "^2.19.0" + commander "^9.4.0" execa "^1.0.0" find-up "^4.1.0" fs-extra "^8.1.0" graceful-fs "^4.1.3" - leven "^3.1.0" - lodash "^4.17.15" - minimist "^1.2.0" prompts "^2.4.0" semver "^6.3.0" @@ -1619,6 +1688,11 @@ resolved "https://registry.yarnpkg.com/@react-native/normalize-color/-/normalize-color-2.0.0.tgz#da955909432474a9a0fe1cbffc66576a0447f567" integrity sha512-Wip/xsc5lw8vsBlmY2MO/gFLp3MvuZ2baBZjDeTjjndMgM0h5sxz7AZR62RDPGgstp8Np7JzjvVqVT7tpFZqsw== +"@react-native/normalize-color@^2.0.0": + version "2.1.0" + resolved "https://registry.yarnpkg.com/@react-native/normalize-color/-/normalize-color-2.1.0.tgz#939b87a9849e81687d3640c5efa2a486ac266f91" + integrity sha512-Z1jQI2NpdFJCVgpY+8Dq/Bt3d+YUi1928Q+/CZm/oh66fzM0RUl54vvuXlPJKybH4pdCZey1eDTPaLHkMPNgWA== + "@react-native/polyfills@2.0.0": version "2.0.0" resolved "https://registry.yarnpkg.com/@react-native/polyfills/-/polyfills-2.0.0.tgz#4c40b74655c83982c8cf47530ee7dc13d957b6aa" @@ -1693,13 +1767,6 @@ dependencies: "@babel/types" "^7.3.0" -"@types/graceful-fs@^4.1.2": - version "4.1.5" - resolved "https://registry.yarnpkg.com/@types/graceful-fs/-/graceful-fs-4.1.5.tgz#21ffba0d98da4350db64891f92a9e5db3cdb4e15" - integrity sha512-anKkLmZZ+xm4p8JWBf4hElkM4XR+EZeA2M9BAkkTldmcyDY4mbdIJnRghDJH3Ov5ooY7/UAoENtmdMSkaAd7Cw== - dependencies: - "@types/node" "*" - "@types/graceful-fs@^4.1.3": version "4.1.6" resolved "https://registry.yarnpkg.com/@types/graceful-fs/-/graceful-fs-4.1.6.tgz#e14b2576a1c25026b7f02ede1de3b84c3a1efeae" @@ -1767,6 +1834,16 @@ dependencies: "@types/yargs-parser" "*" +"@xmldom/xmldom@^0.8.8": + version "0.8.10" + resolved "https://registry.yarnpkg.com/@xmldom/xmldom/-/xmldom-0.8.10.tgz#a1337ca426aa61cef9fe15b5b28e340a72f6fa99" + integrity sha512-2WALfTl4xo2SkGCYRt6rDTFfk9R1czmBvUQy12gK2KuRKIpWEhcbbzy8EZXtz/jkRqHX8bFEc6FC1HjX4TUWYw== + +"@xmldom/xmldom@~0.7.7": + version "0.7.13" + resolved "https://registry.yarnpkg.com/@xmldom/xmldom/-/xmldom-0.7.13.tgz#ff34942667a4e19a9f4a0996a76814daac364cf3" + integrity sha512-lm2GW5PkosIzccsaZIz7tp8cPADSIlIHWDFTR1N0SzfinhhYgeIQjFMz4rYzanCScr3DqQLeomUDArp6MWKm+g== + abort-controller@^3.0.0: version "3.0.0" resolved "https://registry.yarnpkg.com/abort-controller/-/abort-controller-3.0.0.tgz#eaf54d53b62bae4138e809ca225c8439a6efb392" @@ -1867,40 +1944,15 @@ argparse@^1.0.7: dependencies: sprintf-js "~1.0.2" -arr-diff@^4.0.0: - version "4.0.0" - resolved "https://registry.yarnpkg.com/arr-diff/-/arr-diff-4.0.0.tgz#d6461074febfec71e7e15235761a329a5dc7c520" - integrity sha512-YVIQ82gZPGBebQV/a8dar4AitzCQs0jjXwMPZllpXMaGjXPYVUawSxQrRsjhjupyVxEvbHgUmIhKVlND+j02kA== - -arr-flatten@^1.1.0: - version "1.1.0" - resolved "https://registry.yarnpkg.com/arr-flatten/-/arr-flatten-1.1.0.tgz#36048bbff4e7b47e136644316c99669ea5ae91f1" - integrity sha512-L3hKV5R/p5o81R7O02IGnwpDmkp6E982XhtbuwSe3O4qOtMMMtodicASA1Cny2U+aCXcNpml+m4dPsvsJ3jatg== - -arr-union@^3.1.0: - version "3.1.0" - resolved "https://registry.yarnpkg.com/arr-union/-/arr-union-3.1.0.tgz#e39b09aea9def866a8f206e288af63919bae39c4" - integrity sha512-sKpyeERZ02v1FeCZT8lrfJq5u6goHCtpTAzPwJYe7c8SPFOboNjNg1vz2L4VTn9T4PQxEx13TbXLmYUcS6Ug7Q== - -array-unique@^0.3.2: - version "0.3.2" - resolved "https://registry.yarnpkg.com/array-unique/-/array-unique-0.3.2.tgz#a894b75d4bc4f6cd679ef3244a9fd8f46ae2d428" - integrity sha512-SleRWjh9JUud2wH1hPs9rZBZ33H6T9HOiL0uwGnGx9FpE6wKGyfWugmbkEOIs6qWrZhg0LWeLziLrEwQJhs5mQ== - asap@~2.0.6: version "2.0.6" resolved "https://registry.yarnpkg.com/asap/-/asap-2.0.6.tgz#e50347611d7e690943208bbdafebcbc2fb866d46" integrity sha512-BSHWgDSAiKs50o2Re8ppvp3seVHXSRM44cdSsT9FfNEUUZLOGWVCsiWaRPWM1Znn+mqZ1OfVZ3z3DWEzSp7hRA== -assign-symbols@^1.0.0: - version "1.0.0" - resolved "https://registry.yarnpkg.com/assign-symbols/-/assign-symbols-1.0.0.tgz#59667f41fadd4f20ccbc2bb96b8d4f7f78ec0367" - integrity sha512-Q+JC7Whu8HhmTdBph/Tq59IoRtoy6KAm5zzPv00WdujX82lbAL8K7WVjne7vdCsAmbF4AYaDOPyO3k0kl8qIrw== - -ast-types@0.14.2: - version "0.14.2" - resolved "https://registry.yarnpkg.com/ast-types/-/ast-types-0.14.2.tgz#600b882df8583e3cd4f2df5fa20fa83759d4bdfd" - integrity sha512-O0yuUDnZeQDL+ncNGlJ78BiO4jnYI3bvMsD5prT0/nsgijG/LpNBIr63gTjVTNsiGkgQhiyCShTgxt8oXOrklA== +ast-types@0.15.2: + version "0.15.2" + resolved "https://registry.yarnpkg.com/ast-types/-/ast-types-0.15.2.tgz#39ae4809393c4b16df751ee563411423e85fb49d" + integrity sha512-c27loCv9QkZinsa5ProX751khO9DJl/AcB5c2KNtA6NRvHKS0PgLfcftz72KVq504vB0Gku5s2kUZzDBvQWvHg== dependencies: tslib "^2.0.1" @@ -1919,11 +1971,6 @@ async@^3.2.2: resolved "https://registry.yarnpkg.com/async/-/async-3.2.4.tgz#2d22e00f8cddeb5fde5dd33522b56d1cf569a81c" integrity sha512-iAB+JbDEGXhyIUavoDl9WP/Jj106Kz9DEn1DPgYw5ruDn0e3Wgi3sKFm55sASdGBNOQB8F59d9qQ7deqrHA8wQ== -atob@^2.1.2: - version "2.1.2" - resolved "https://registry.yarnpkg.com/atob/-/atob-2.1.2.tgz#6d9517eb9e030d2436666651e86bd9f6f13533c9" - integrity sha512-Wm6ukoaOGJi/73p/cl2GvLjTI5JM1k/O14isD73YML8StrH/7/lRFgmg8nICZgD3bZZvjwCGxtMOD3wWNAu8cg== - babel-core@^7.0.0-bridge.0: version "7.0.0-bridge.0" resolved "https://registry.yarnpkg.com/babel-core/-/babel-core-7.0.0-bridge.0.tgz#95a492ddd90f9b4e9a4a1da14eb335b87b634ece" @@ -2079,23 +2126,15 @@ base-64@^0.1.0: resolved "https://registry.yarnpkg.com/base-64/-/base-64-0.1.0.tgz#780a99c84e7d600260361511c4877613bf24f6bb" integrity sha512-Y5gU45svrR5tI2Vt/X9GPd3L0HNIKzGu202EjxrXMpuc2V2CiKgemAbUUsqYmZJvPtCXoUKjNZwBJzsNScUbXA== -base64-js@^1.1.2, base64-js@^1.3.1, base64-js@^1.5.1: +base64-js@^1.1.2, base64-js@^1.2.3, base64-js@^1.3.1, base64-js@^1.5.1: version "1.5.1" resolved "https://registry.yarnpkg.com/base64-js/-/base64-js-1.5.1.tgz#1b1b440160a5bf7ad40b650f095963481903930a" integrity sha512-AKpaYlHn8t4SVbOHCy+b5+KKgvR4vrsD8vbvrbiQJps7fKDTkjkDry6ji0rUJjC0kzbNePLwzxq8iypo41qeWA== -base@^0.11.1: - version "0.11.2" - resolved "https://registry.yarnpkg.com/base/-/base-0.11.2.tgz#7bde5ced145b6d551a90db87f83c558b4eb48a8f" - integrity sha512-5T6P4xPgpp0YDFvSWwEZ4NoE3aM4QBQXDzmVbraCkFj8zHM+mba8SyqB5DbZWyR7mYHo6Y7BdQo3MoA4m0TeQg== - dependencies: - cache-base "^1.0.1" - class-utils "^0.3.5" - component-emitter "^1.2.1" - define-property "^1.0.0" - isobject "^3.0.1" - mixin-deep "^1.2.0" - pascalcase "^0.1.1" +big-integer@1.6.x: + version "1.6.52" + resolved "https://registry.yarnpkg.com/big-integer/-/big-integer-1.6.52.tgz#60a887f3047614a8e1bffe5d7173490a97dc8c85" + integrity sha512-QxD8cf2eVqJOOz63z6JIN9BzvVs/dlySa5HGSBH5xtR8dPteIRQnBxxKqkNTiT6jbDTF6jAfrd4oMcND9RGbQg== bl@^4.1.0: version "4.1.0" @@ -2111,6 +2150,20 @@ bluebird@^3.5.4: resolved "https://registry.yarnpkg.com/bluebird/-/bluebird-3.7.2.tgz#9f229c15be272454ffa973ace0dbee79a1b0c36f" integrity sha512-XpNj6GDQzdfW+r2Wnn7xiSAd7TM3jzkxGXBGTtWKuSXv1xUV+azxAm8jdWZN06QTQk+2N2XB9jRDkvbmQmcRtg== +bplist-creator@0.1.0: + version "0.1.0" + resolved "https://registry.yarnpkg.com/bplist-creator/-/bplist-creator-0.1.0.tgz#018a2d1b587f769e379ef5519103730f8963ba1e" + integrity sha512-sXaHZicyEEmY86WyueLTQesbeoH/mquvarJaQNbjuOQO+7gbFcDEWqKmcWA4cOTLzFlfgvkiVxolk1k5bBIpmg== + dependencies: + stream-buffers "2.2.x" + +bplist-parser@0.3.1: + version "0.3.1" + resolved "https://registry.yarnpkg.com/bplist-parser/-/bplist-parser-0.3.1.tgz#e1c90b2ca2a9f9474cc72f6862bbf3fee8341fd1" + integrity sha512-PyJxiNtA5T2PlLIeBot4lbp7rj4OadzjnMZD/G5zuBNt8ei/yCU7+wW0h2bag9vr8c+/WuRWmSxbqAl9hL1rBA== + dependencies: + big-integer "1.6.x" + brace-expansion@^1.1.7: version "1.1.11" resolved "https://registry.yarnpkg.com/brace-expansion/-/brace-expansion-1.1.11.tgz#3c7fcbf529d87226f3d2f52b966ff5271eb441dd" @@ -2126,22 +2179,6 @@ brace-expansion@^2.0.1: dependencies: balanced-match "^1.0.0" -braces@^2.3.1: - version "2.3.2" - resolved "https://registry.yarnpkg.com/braces/-/braces-2.3.2.tgz#5979fd3f14cd531565e5fa2df1abfff1dfaee729" - integrity sha512-aNdbnj9P8PjdXU4ybaWLK2IF3jc/EoDYbC7AazW6to3TRsfXxscC9UXOB5iDiEQrkyIbWp2SLQda4+QAa7nc3w== - dependencies: - arr-flatten "^1.1.0" - array-unique "^0.3.2" - extend-shallow "^2.0.1" - fill-range "^4.0.0" - isobject "^3.0.1" - repeat-element "^1.1.2" - snapdragon "^0.8.1" - snapdragon-node "^2.0.1" - split-string "^3.0.2" - to-regex "^3.0.1" - braces@^3.0.2: version "3.0.3" resolved "https://registry.yarnpkg.com/braces/-/braces-3.0.3.tgz#490332f40919452272d55a8480adc0c441358789" @@ -2195,6 +2232,14 @@ buffer@^5.5.0: base64-js "^1.3.1" ieee754 "^1.1.13" +buffer@^6.0.3: + version "6.0.3" + resolved "https://registry.yarnpkg.com/buffer/-/buffer-6.0.3.tgz#2ace578459cc8fbe2a70aaa8f52ee63b6a74c6c6" + integrity sha512-FTiCpNxtwiZZHEZbcbTIcZjERVICn9yq/pDFkTl95/AxzD1naBctN7YO68riM/gLSDY7sdrMby8hofADYuuqOA== + dependencies: + base64-js "^1.3.1" + ieee754 "^1.2.1" + bunyan-debug-stream@^3.1.0: version "3.1.0" resolved "https://registry.yarnpkg.com/bunyan-debug-stream/-/bunyan-debug-stream-3.1.0.tgz#78309c67ad85cfb8f011155334152c49209dcda8" @@ -2217,21 +2262,6 @@ bytes@3.0.0: resolved "https://registry.yarnpkg.com/bytes/-/bytes-3.0.0.tgz#d32815404d689699f85a4ea4fa8755dd13a96048" integrity sha512-pMhOfFDPiv9t5jjIXkHosWmkSyQbvsgEVNkz0ERHbuLh2T/7j4Mqqpz523Fe8MVY89KC6Sh/QfS2sM+SjgFDcw== -cache-base@^1.0.1: - version "1.0.1" - resolved "https://registry.yarnpkg.com/cache-base/-/cache-base-1.0.1.tgz#0a7f46416831c8b662ee36fe4e7c59d76f666ab2" - integrity sha512-AKcdTnFSWATd5/GCPRxr2ChwIJ85CeyrEyjRHlKxQ56d4XJMGym0uAiKn0xbLOGOl3+yRpOTi484dVCEc5AUzQ== - dependencies: - collection-visit "^1.0.0" - component-emitter "^1.2.1" - get-value "^2.0.6" - has-value "^1.0.0" - isobject "^3.0.1" - set-value "^2.0.0" - to-object-path "^0.3.0" - union-value "^1.0.0" - unset-value "^1.0.0" - caf@^15.0.1: version "15.0.1" resolved "https://registry.yarnpkg.com/caf/-/caf-15.0.1.tgz#28f1f17bd93dc4b5d95207ad07066eddf4768160" @@ -2335,16 +2365,6 @@ cjs-module-lexer@^1.0.0: resolved "https://registry.yarnpkg.com/cjs-module-lexer/-/cjs-module-lexer-1.2.2.tgz#9f84ba3244a512f3a54e5277e8eef4c489864e40" integrity sha512-cOU9usZw8/dXIXKtwa8pM0OTJQuJkxMN6w30csNRUerHfeQ5R6U3kkU/FtJeIf3M202OHfY2U8ccInBG7/xogA== -class-utils@^0.3.5: - version "0.3.6" - resolved "https://registry.yarnpkg.com/class-utils/-/class-utils-0.3.6.tgz#f93369ae8b9a7ce02fd41faad0ca83033190c463" - integrity sha512-qOhPa/Fj7s6TY8H8esGu5QNpMMQxz79h+urzrNYN6mn+9BnxlDGf5QZ+XeCDsxSjPqsSR56XOZOJmpeurnLMeg== - dependencies: - arr-union "^3.1.0" - define-property "^0.2.5" - isobject "^3.0.0" - static-extend "^0.1.1" - cli-cursor@^3.1.0: version "3.1.0" resolved "https://registry.yarnpkg.com/cli-cursor/-/cli-cursor-3.1.0.tgz#264305a7ae490d1d03bf0c9ba7c925d1753af307" @@ -2408,14 +2428,6 @@ collect-v8-coverage@^1.0.0: resolved "https://registry.yarnpkg.com/collect-v8-coverage/-/collect-v8-coverage-1.0.1.tgz#cc2c8e94fc18bbdffe64d6534570c8a673b27f59" integrity sha512-iBPtljfCNcTKNAto0KEtDfZ3qzjJvqE3aTGZsbhjSBlorqpXJlaWWtPO35D+ZImoC3KWejX64o+yPGxhWSTzfg== -collection-visit@^1.0.0: - version "1.0.0" - resolved "https://registry.yarnpkg.com/collection-visit/-/collection-visit-1.0.0.tgz#4bc0373c164bc3291b4d368c829cf1a80a59dca0" - integrity sha512-lNkKvzEeMBBjUGHZ+q6z9pSJla0KWAQPvtzhEV9+iGyQYG+pBpl7xKDhxoNSOZH2hhv0v5k0y2yAM4o4SjoSkw== - dependencies: - map-visit "^1.0.0" - object-visit "^1.0.0" - color-convert@^1.9.0: version "1.9.3" resolved "https://registry.yarnpkg.com/color-convert/-/color-convert-1.9.3.tgz#bb71850690e1f136567de629d2d5471deda4c1e8" @@ -2450,10 +2462,10 @@ command-exists@^1.2.8: resolved "https://registry.yarnpkg.com/command-exists/-/command-exists-1.2.9.tgz#c50725af3808c8ab0260fd60b01fbfa25b954f69" integrity sha512-LTQ/SGc+s0Xc0Fu5WaKnR0YiygZkm9eKFvyS+fRsU7/ZWFF8ykFM6Pc9aCVf1+xasOOZpO3BAVgVrKvsqKHV7w== -commander@^2.19.0: - version "2.20.3" - resolved "https://registry.yarnpkg.com/commander/-/commander-2.20.3.tgz#fd485e84c03eb4881c20722ba48035e8531aeb33" - integrity sha512-GpVkmM8vF2vQUkj2LvZmD35JxeJOLCwJ9cUkugyk2nuhbv3+mJvpLYYt+0+USMxE+oj+ey/lJEnhZw75x/OMcQ== +commander@^9.4.0: + version "9.5.0" + resolved "https://registry.yarnpkg.com/commander/-/commander-9.5.0.tgz#bc08d1eb5cedf7ccb797a96199d41c7bc3e60d30" + integrity sha512-KRs7WVDKg86PWiuAqhDrAQnTXZKraVcCc6vFdL14qrZ/DcWwuRo7VoiYXalXO7S5GKpqYiVEwCbgFDfxNHKJBQ== commander@~2.13.0: version "2.13.0" @@ -2465,11 +2477,6 @@ commondir@^1.0.1: resolved "https://registry.yarnpkg.com/commondir/-/commondir-1.0.1.tgz#ddd800da0c66127393cca5950ea968a3aaf1253b" integrity sha512-W9pAhw0ja1Edb5GVdIF1mjZw/ASI0AlShXM83UUGe2DVr5TdAPEA1OA8m/g8zWp9x6On7gqufY+FatDbC3MDQg== -component-emitter@^1.2.1: - version "1.3.0" - resolved "https://registry.yarnpkg.com/component-emitter/-/component-emitter-1.3.0.tgz#16e4070fba8ae29b679f2215853ee181ab2eabc0" - integrity sha512-Rd3se6QB+sO1TwqZjscQrurpEPIfO0/yYnSin6Q/rD3mOutHvUrCAhJub3r90uNb+SESBuE0QYoB90YdfatsRg== - compressible@~2.0.16: version "2.0.18" resolved "https://registry.yarnpkg.com/compressible/-/compressible-2.0.18.tgz#af53cca6b070d4c3c0750fbd77286a6d7cc46fba" @@ -2522,11 +2529,6 @@ convert-source-map@^2.0.0: resolved "https://registry.yarnpkg.com/convert-source-map/-/convert-source-map-2.0.0.tgz#4b560f649fc4e918dd0ab75cf4961e8bc882d82a" integrity sha512-Kvp459HrV2FEJ1CAsi1Ku+MY3kasH19TFykTz2xWmMeq6bk2NU3XXvfJ+Q61m0xktWwt+1HSYf3JZsTms3aRJg== -copy-descriptor@^0.1.0: - version "0.1.1" - resolved "https://registry.yarnpkg.com/copy-descriptor/-/copy-descriptor-0.1.1.tgz#676f6eb3c39997c2ee1ac3a924fd6124748f578d" - integrity sha512-XgZ0pFcakEUlbwQEVNg3+QAis1FyTL3Qel9FYy8pSkQqoG3PNoT0bOCQtOXcOkur21r2Eq2kI+IE+gsmAEVlYw== - core-js-compat@^3.21.0: version "3.23.1" resolved "https://registry.yarnpkg.com/core-js-compat/-/core-js-compat-3.23.1.tgz#23d44d9f209086e60dabf9130cea7719af6e199b" @@ -2583,7 +2585,7 @@ dayjs@^1.8.15: resolved "https://registry.yarnpkg.com/dayjs/-/dayjs-1.11.3.tgz#4754eb694a624057b9ad2224b67b15d552589258" integrity sha512-xxwlswWOlGhzgQ4TKzASQkUhqERI3egRNqgV4ScR8wlANA/A9tZ7miXa44vTTKEq5l7vWoL5G57bG3zA+Kow0A== -debug@2.6.9, debug@^2.2.0, debug@^2.3.3: +debug@2.6.9, debug@^2.2.0: version "2.6.9" resolved "https://registry.yarnpkg.com/debug/-/debug-2.6.9.tgz#5d128515df134ff327e90a4c93f4e077a536341f" integrity sha512-bC7ElrdJaJnPbAP+1EotYvqZsb3ecl5wi6Bfi6BJTUcNowp6cvspg0jXznRTKDjm/E7AdgFBVeAPVMNcKGsHMA== @@ -2597,6 +2599,13 @@ debug@^4.1.0, debug@^4.1.1: dependencies: ms "2.1.2" +debug@^4.3.1, debug@^4.3.4: + version "4.4.0" + resolved "https://registry.yarnpkg.com/debug/-/debug-4.4.0.tgz#2b3f2aea2ffeb776477460267377dc8710faba8a" + integrity sha512-6WTZ/IxCY/T6BALoZHaE4ctp9xm+Z5kY/pzYaCHRFeyVhojxlrm+46y68HA6hr0TcwEssoxNiDEUJQjfPZ/RYA== + dependencies: + ms "^2.1.3" + decamelize@^1.2.0: version "1.2.0" resolved "https://registry.yarnpkg.com/decamelize/-/decamelize-1.2.0.tgz#f6534d15148269b20352e7bee26f501f9a191290" @@ -2607,11 +2616,6 @@ decamelize@^4.0.0: resolved "https://registry.yarnpkg.com/decamelize/-/decamelize-4.0.0.tgz#aa472d7bf660eb15f3494efd531cab7f2a709837" integrity sha512-9iE1PgSik9HeIIw2JO94IidnE3eBoQrFJ3w7sFuzSX4DpmZ3v5sZpUiV5Swcf6mQEF+Y0ru8Neo+p+nyh2J+hQ== -decode-uri-component@^0.2.0: - version "0.2.2" - resolved "https://registry.yarnpkg.com/decode-uri-component/-/decode-uri-component-0.2.2.tgz#e69dbe25d37941171dd540e024c444cd5188e1e9" - integrity sha512-FqUYQ+8o158GyGTrMFJms9qh3CqTKvAqgqsTnkLI8sKu0028orqBhxNMFkFen0zGyg6epACD32pjVk58ngIErQ== - dedent@^0.7.0: version "0.7.0" resolved "https://registry.yarnpkg.com/dedent/-/dedent-0.7.0.tgz#2495ddbaf6eb874abb0e1be9df22d2e5a544326c" @@ -2642,28 +2646,6 @@ define-properties@^1.1.3: has-property-descriptors "^1.0.0" object-keys "^1.1.1" -define-property@^0.2.5: - version "0.2.5" - resolved "https://registry.yarnpkg.com/define-property/-/define-property-0.2.5.tgz#c35b1ef918ec3c990f9a5bc57be04aacec5c8116" - integrity sha512-Rr7ADjQZenceVOAKop6ALkkRAmH1A4Gx9hV/7ZujPUN2rkATqFO0JZLZInbAjpZYoJ1gUx8MRMQVkYemcbMSTA== - dependencies: - is-descriptor "^0.1.0" - -define-property@^1.0.0: - version "1.0.0" - resolved "https://registry.yarnpkg.com/define-property/-/define-property-1.0.0.tgz#769ebaaf3f4a63aad3af9e8d304c9bbe79bfb0e6" - integrity sha512-cZTYKFWspt9jZsMscWo8sc/5lbPC9Q0N5nBLgb+Yd915iL3udB1uFgS3B8YCx66UVHq018DAVFoee7x+gxggeA== - dependencies: - is-descriptor "^1.0.0" - -define-property@^2.0.2: - version "2.0.2" - resolved "https://registry.yarnpkg.com/define-property/-/define-property-2.0.2.tgz#d459689e8d654ba77e02a817f8710d702cb16e9d" - integrity sha512-jwK2UV4cnPpbcG7+VRARKTZPUWowwXA8bzH5NP6ud0oeAxyYPuGZUAC7hMugpCdz4BeSZl2Dl9k66CHJ/46ZYQ== - dependencies: - is-descriptor "^1.0.2" - isobject "^3.0.1" - denodeify@^1.2.1: version "1.2.1" resolved "https://registry.yarnpkg.com/denodeify/-/denodeify-1.2.1.tgz#3a36287f5034e699e7577901052c2e6c94251631" @@ -2885,19 +2867,6 @@ exit@^0.1.2: resolved "https://registry.yarnpkg.com/exit/-/exit-0.1.2.tgz#0632638f8d877cc82107d30a0fff1a17cba1cd0c" integrity sha512-Zk/eNKV2zbjpKzrsQ+n1G6poVbErQxJ0LBOJXaKZ1EViLzH+hrLu9cdXI4zw9dBQJslwBEpbQ2P1oS7nDxs6jQ== -expand-brackets@^2.1.4: - version "2.1.4" - resolved "https://registry.yarnpkg.com/expand-brackets/-/expand-brackets-2.1.4.tgz#b77735e315ce30f6b6eff0f83b04151a22449622" - integrity sha512-w/ozOKR9Obk3qoWeY/WDi6MFta9AoMR+zud60mdnbniMcBxRuFJyDt2LdX/14A1UABeqk+Uk+LDfUpvoGKppZA== - dependencies: - debug "^2.3.3" - define-property "^0.2.5" - extend-shallow "^2.0.1" - posix-character-classes "^0.1.0" - regex-not "^1.0.0" - snapdragon "^0.8.1" - to-regex "^3.0.1" - expect@^29.5.0: version "29.5.0" resolved "https://registry.yarnpkg.com/expect/-/expect-29.5.0.tgz#68c0509156cb2a0adb8865d413b137eeaae682f7" @@ -2909,35 +2878,6 @@ expect@^29.5.0: jest-message-util "^29.5.0" jest-util "^29.5.0" -extend-shallow@^2.0.1: - version "2.0.1" - resolved "https://registry.yarnpkg.com/extend-shallow/-/extend-shallow-2.0.1.tgz#51af7d614ad9a9f610ea1bafbb989d6b1c56890f" - integrity sha512-zCnTtlxNoAiDc3gqY2aYAWFx7XWWiasuF2K8Me5WbN8otHKTUKBwjPtNpRs/rbUZm7KxWAaNj7P1a/p52GbVug== - dependencies: - is-extendable "^0.1.0" - -extend-shallow@^3.0.0, extend-shallow@^3.0.2: - version "3.0.2" - resolved "https://registry.yarnpkg.com/extend-shallow/-/extend-shallow-3.0.2.tgz#26a71aaf073b39fb2127172746131c2704028db8" - integrity sha512-BwY5b5Ql4+qZoefgMj2NUmx+tehVTH/Kf4k1ZEtOHNFcm2wSxMRo992l6X3TIgni2eZVTZ85xMOjF31fwZAj6Q== - dependencies: - assign-symbols "^1.0.0" - is-extendable "^1.0.1" - -extglob@^2.0.4: - version "2.0.4" - resolved "https://registry.yarnpkg.com/extglob/-/extglob-2.0.4.tgz#ad00fe4dc612a9232e8718711dc5cb5ab0285543" - integrity sha512-Nmb6QXkELsuBr24CJSkilo6UHHgbekK5UiZgfE6UHD3Eb27YC6oD+bhcT+tJ6cl8dmsgdQxnWlcry8ksBIBLpw== - dependencies: - array-unique "^0.3.2" - define-property "^1.0.0" - expand-brackets "^2.1.4" - extend-shallow "^2.0.1" - fragment-cache "^0.2.1" - regex-not "^1.0.0" - snapdragon "^0.8.1" - to-regex "^3.0.1" - fast-deep-equal@^3.1.1: version "3.1.3" resolved "https://registry.yarnpkg.com/fast-deep-equal/-/fast-deep-equal-3.1.3.tgz#3a7d56b559d6cbc3eb512325244e619a65c6c525" @@ -2955,16 +2895,6 @@ fb-watchman@^2.0.0: dependencies: bser "2.1.1" -fill-range@^4.0.0: - version "4.0.0" - resolved "https://registry.yarnpkg.com/fill-range/-/fill-range-4.0.0.tgz#d544811d428f98eb06a63dc402d2403c328c38f7" - integrity sha512-VcpLTWqWDiTerugjj8e3+esbg+skS3M9e54UuR3iCeIDMXCLTsAH8hTSzDQU/X6/6t3eYkOKoZSef2PlU6U1XQ== - dependencies: - extend-shallow "^2.0.1" - is-number "^3.0.0" - repeat-string "^1.6.1" - to-regex-range "^2.1.0" - fill-range@^7.1.1: version "7.1.1" resolved "https://registry.yarnpkg.com/fill-range/-/fill-range-7.1.1.tgz#44265d3cac07e3ea7dc247516380643754a05292" @@ -3017,7 +2947,7 @@ find-up@^4.0.0, find-up@^4.1.0: locate-path "^5.0.0" path-exists "^4.0.0" -find-up@^5.0.0: +find-up@^5.0.0, find-up@~5.0.0: version "5.0.0" resolved "https://registry.yarnpkg.com/find-up/-/find-up-5.0.0.tgz#4c92819ecb7083561e4f4a240a86be5198f536fc" integrity sha512-78/PXT1wlLLDgTzDs7sjq9hzz0vXD+zn+7wypEe4fXQxCmdmqfGsEPQxmiCSQI3ajFV91bVSsvNtrJRiW6nGng== @@ -3040,18 +2970,6 @@ flow-parser@^0.121.0: resolved "https://registry.yarnpkg.com/flow-parser/-/flow-parser-0.121.0.tgz#9f9898eaec91a9f7c323e9e992d81ab5c58e618f" integrity sha512-1gIBiWJNR0tKUNv8gZuk7l9rVX06OuLzY9AoGio7y/JT4V1IZErEMEq2TJS+PFcw/y0RshZ1J/27VfK1UQzYVg== -for-in@^1.0.2: - version "1.0.2" - resolved "https://registry.yarnpkg.com/for-in/-/for-in-1.0.2.tgz#81068d295a8142ec0ac726c6e2200c30fb6d5e80" - integrity sha512-7EwmXrOjyL+ChxMhmG5lnW9MPt1aIeZEwKhQzoBUdTV0N3zuwWDZYVJatDvZ2OyzPUvdIAZDsCetk3coyMfcnQ== - -fragment-cache@^0.2.1: - version "0.2.1" - resolved "https://registry.yarnpkg.com/fragment-cache/-/fragment-cache-0.2.1.tgz#4290fad27f13e89be7f33799c6bc5a0abfff0d19" - integrity sha512-GMBAbW9antB8iZRHLoGw0b3HANt57diZYFO/HL1JGIC1MjKrdmhxvrJbupnVvpys0zsz7yBApXdQyfepKly2kA== - dependencies: - map-cache "^0.2.2" - fresh@0.5.2: version "0.5.2" resolved "https://registry.yarnpkg.com/fresh/-/fresh-0.5.2.tgz#3d8cadd90d976569fa835ab1f8e4b23a105605a7" @@ -3089,6 +3007,11 @@ fs.realpath@^1.0.0: resolved "https://registry.yarnpkg.com/fs.realpath/-/fs.realpath-1.0.0.tgz#1504ad2523158caa40db4a2787cb01411994ea4f" integrity sha512-OO0pH2lK6a0hZnAdau5ItzHPI6pUlvI7jMVnxUQRtw4owF2wk8lOSabtGDCTP4Ggrg2MbGnWO9X8K1t4+fGMDw== +fsevents@^2.1.2: + version "2.3.3" + resolved "https://registry.yarnpkg.com/fsevents/-/fsevents-2.3.3.tgz#cac6407785d03675a2a5e1a5305c697b347d90d6" + integrity sha512-5xoDfX+fL7faATnagmWPpbFtwh/R77WmMMqqHGS65C3vvB0YHrgF+B1YmZ3441tMj5n63k0212XNoJwzlhffQw== + fsevents@^2.3.2: version "2.3.2" resolved "https://registry.yarnpkg.com/fsevents/-/fsevents-2.3.2.tgz#8a526f78b8fdf4623b709e0b975c52c24c02fd1a" @@ -3140,10 +3063,22 @@ get-stream@^6.0.0: resolved "https://registry.yarnpkg.com/get-stream/-/get-stream-6.0.1.tgz#a262d8eef67aced57c2852ad6167526a43cbf7b7" integrity sha512-ts6Wi+2j3jQjqi70w5AlN8DFnkSwC+MqmxEzdEALB2qXZYV3X/b1CTfgPLGJNMeAWxdPfU8FO1ms3NUfaHCPYg== -get-value@^2.0.3, get-value@^2.0.6: - version "2.0.6" - resolved "https://registry.yarnpkg.com/get-value/-/get-value-2.0.6.tgz#dc15ca1c672387ca76bd37ac0a395ba2042a2c28" - integrity sha512-Ln0UQDlxH1BapMu3GPtf7CuYNwRZf2gwCuPqbyG6pB8WfmFpzqcy4xtAaAMUhnNqjMKTiCPZG2oMT3YSx8U2NA== +getenv@^1.0.0: + version "1.0.0" + resolved "https://registry.yarnpkg.com/getenv/-/getenv-1.0.0.tgz#874f2e7544fbca53c7a4738f37de8605c3fcfc31" + integrity sha512-7yetJWqbS9sbn0vIfliPsFgoXMKn/YMF+Wuiog97x+urnSRRRZ7xB+uVkwGKzRgq9CDFfMQnE9ruL5DHv9c6Xg== + +glob@7.1.6: + version "7.1.6" + resolved "https://registry.yarnpkg.com/glob/-/glob-7.1.6.tgz#141f33b81a7c2492e125594307480c46679278a6" + integrity sha512-LwaxwyZ72Lk7vZINtNNrywX0ZuLyStrdDtabefZKAY5ZGJhVtgdznluResxNmPitE0SAO+O26sWTHeKSI2wMBA== + dependencies: + fs.realpath "^1.0.0" + inflight "^1.0.4" + inherits "2" + minimatch "^3.0.4" + once "^1.3.0" + path-is-absolute "^1.0.0" glob@^6.0.1: version "6.0.4" @@ -3211,37 +3146,6 @@ has-symbols@^1.0.1, has-symbols@^1.0.3: resolved "https://registry.yarnpkg.com/has-symbols/-/has-symbols-1.0.3.tgz#bb7b2c4349251dce87b125f7bdf874aa7c8b39f8" integrity sha512-l3LCuF6MgDNwTDKkdYGEihYjt5pRPbEg46rtlmnSPlUbgmB8LOIrKJbYYFBSbnPaJexMKtiPO8hmeRjRz2Td+A== -has-value@^0.3.1: - version "0.3.1" - resolved "https://registry.yarnpkg.com/has-value/-/has-value-0.3.1.tgz#7b1f58bada62ca827ec0a2078025654845995e1f" - integrity sha512-gpG936j8/MzaeID5Yif+577c17TxaDmhuyVgSwtnL/q8UUTySg8Mecb+8Cf1otgLoD7DDH75axp86ER7LFsf3Q== - dependencies: - get-value "^2.0.3" - has-values "^0.1.4" - isobject "^2.0.0" - -has-value@^1.0.0: - version "1.0.0" - resolved "https://registry.yarnpkg.com/has-value/-/has-value-1.0.0.tgz#18b281da585b1c5c51def24c930ed29a0be6b177" - integrity sha512-IBXk4GTsLYdQ7Rvt+GRBrFSVEkmuOUy4re0Xjd9kJSUQpnTrWR4/y9RpfexN9vkAPMFuQoeWKwqzPozRTlasGw== - dependencies: - get-value "^2.0.6" - has-values "^1.0.0" - isobject "^3.0.0" - -has-values@^0.1.4: - version "0.1.4" - resolved "https://registry.yarnpkg.com/has-values/-/has-values-0.1.4.tgz#6d61de95d91dfca9b9a02089ad384bff8f62b771" - integrity sha512-J8S0cEdWuQbqD9//tlZxiMuMNmxB8PlEwvYwuxsTmR1G5RXUePEX/SJn7aD0GMLieuZYSwNH0cQuJGwnYunXRQ== - -has-values@^1.0.0: - version "1.0.0" - resolved "https://registry.yarnpkg.com/has-values/-/has-values-1.0.0.tgz#95b0b63fec2146619a6fe57fe75628d5a39efe4f" - integrity sha512-ODYZC64uqzmtfGMEAX/FvZiRyWLpAC3vYnNunURUnkGVTS+mI0smVsWaPydRBsE3g+ok7h960jChO8mFcWlHaQ== - dependencies: - is-number "^3.0.0" - kind-of "^4.0.0" - has@^1.0.3: version "1.0.3" resolved "https://registry.yarnpkg.com/has/-/has-1.0.3.tgz#722d7cbfc1f6aa8241f16dd814e011e1f41e8796" @@ -3249,22 +3153,17 @@ has@^1.0.3: dependencies: function-bind "^1.1.1" -hermes-engine@~0.11.0: - version "0.11.0" - resolved "https://registry.yarnpkg.com/hermes-engine/-/hermes-engine-0.11.0.tgz#bb224730d230a02a5af02c4e090d1f52d57dd3db" - integrity sha512-7aMUlZja2IyLYAcZ69NBnwJAR5ZOYlSllj0oMpx08a8HzxHOys0eKCzfphrf6D0vX1JGO1QQvVsQKe6TkYherw== - -hermes-estree@0.6.0: - version "0.6.0" - resolved "https://registry.yarnpkg.com/hermes-estree/-/hermes-estree-0.6.0.tgz#e866fddae1b80aec65fe2ae450a5f2070ad54033" - integrity sha512-2YTGzJCkhdmT6VuNprWjXnvTvw/3iPNw804oc7yknvQpNKo+vJGZmtvLLCghOZf0OwzKaNAzeIMp71zQbNl09w== +hermes-estree@0.8.0: + version "0.8.0" + resolved "https://registry.yarnpkg.com/hermes-estree/-/hermes-estree-0.8.0.tgz#530be27243ca49f008381c1f3e8b18fb26bf9ec0" + integrity sha512-W6JDAOLZ5pMPMjEiQGLCXSSV7pIBEgRR5zGkxgmzGSXHOxqV5dC/M1Zevqpbm9TZDE5tu358qZf8Vkzmsc+u7Q== -hermes-parser@0.6.0: - version "0.6.0" - resolved "https://registry.yarnpkg.com/hermes-parser/-/hermes-parser-0.6.0.tgz#00d14e91bca830b3c1457050fa4187400cb96328" - integrity sha512-Vf58jBZca2+QBLR9h7B7mdg8oFz2g5ILz1iVouZ5DOrOrAfBmPfJjdjDT8jrO0f+iJ4/hSRrQHqHIjSnTaLUDQ== +hermes-parser@0.8.0: + version "0.8.0" + resolved "https://registry.yarnpkg.com/hermes-parser/-/hermes-parser-0.8.0.tgz#116dceaba32e45b16d6aefb5c4c830eaeba2d257" + integrity sha512-yZKalg1fTYG5eOiToLUaw69rQfZq/fi+/NtEXRU7N87K/XobNRhRWorh80oSge2lWUiZfTgUvRJH+XgZWrhoqA== dependencies: - hermes-estree "0.6.0" + hermes-estree "0.8.0" hermes-profile-transformer@^0.0.6: version "0.0.6" @@ -3294,7 +3193,7 @@ human-signals@^2.1.0: resolved "https://registry.yarnpkg.com/human-signals/-/human-signals-2.1.0.tgz#dc91fcba42e4d06e4abaed33b3e7a3c02f514ea0" integrity sha512-B4FFZ6q/T2jhhksgkbEW3HBvWIfDW85snkQgawt07S7J5QXTk6BkNV+0yAeZrM5QpMAdYlocGoljn0sJ/WQkFw== -ieee754@^1.1.13: +ieee754@^1.1.13, ieee754@^1.2.1: version "1.2.1" resolved "https://registry.yarnpkg.com/ieee754/-/ieee754-1.2.1.tgz#8eb7a10a63fff25d15a57b001586d177d1b0d352" integrity sha512-dcyqhDvX1C46lXZcVqCpK+FtMRQVdIMN6/Df5js2zouUsqG7I6sFxitIC+7KYK29KdXOLHdu9zL4sFnoVQnqaA== @@ -3355,30 +3254,11 @@ ip@^1.1.5: resolved "https://registry.yarnpkg.com/ip/-/ip-1.1.9.tgz#8dfbcc99a754d07f425310b86a99546b1151e396" integrity sha512-cyRxvOEpNHNtchU3Ln9KC/auJgup87llfQpQ+t5ghoC/UhL16SWzbueiCsdTnWmqAWl7LadfuwhlqmtOaqMHdQ== -is-accessor-descriptor@^0.1.6: - version "0.1.6" - resolved "https://registry.yarnpkg.com/is-accessor-descriptor/-/is-accessor-descriptor-0.1.6.tgz#a9e12cb3ae8d876727eeef3843f8a0897b5c98d6" - integrity sha512-e1BM1qnDbMRG3ll2U9dSK0UMHuWOs3pY3AtcFsmvwPtKL3MML/Q86i+GilLfvqEs4GW+ExB91tQ3Ig9noDIZ+A== - dependencies: - kind-of "^3.0.2" - -is-accessor-descriptor@^1.0.0: - version "1.0.0" - resolved "https://registry.yarnpkg.com/is-accessor-descriptor/-/is-accessor-descriptor-1.0.0.tgz#169c2f6d3df1f992618072365c9b0ea1f6878656" - integrity sha512-m5hnHTkcVsPfqx3AKlyttIPb7J+XykHvJP2B9bZDjlhLIoEq4XoK64Vg7boZlVWYK6LUY94dYPEE7Lh0ZkZKcQ== - dependencies: - kind-of "^6.0.0" - is-arrayish@^0.2.1: version "0.2.1" resolved "https://registry.yarnpkg.com/is-arrayish/-/is-arrayish-0.2.1.tgz#77c99840527aa8ecb1a8ba697b80645a7a926a9d" integrity sha512-zz06S8t0ozoDXMG+ube26zeCTNXcKIPJZJi8hBrF4idCLms4CG9QtK7qBl1boi5ODzFpjswb5JPmHCbMpjaYzg== -is-buffer@^1.1.5: - version "1.1.6" - resolved "https://registry.yarnpkg.com/is-buffer/-/is-buffer-1.1.6.tgz#efaa2ea9daa0d7ab2ea13a97b2b8ad51fefbe8be" - integrity sha512-NcdALwpXkTm5Zvvbk7owOUSvVvBKDgKP5/ewfXEznmQFfs4ZRmanOeKBTjRVjka3QFoN6XJ+9F3USqfHqTaU5w== - is-core-module@^2.11.0: version "2.12.0" resolved "https://registry.yarnpkg.com/is-core-module/-/is-core-module-2.12.0.tgz#36ad62f6f73c8253fd6472517a12483cf03e7ec4" @@ -3393,55 +3273,11 @@ is-core-module@^2.8.1: dependencies: has "^1.0.3" -is-data-descriptor@^0.1.4: - version "0.1.4" - resolved "https://registry.yarnpkg.com/is-data-descriptor/-/is-data-descriptor-0.1.4.tgz#0b5ee648388e2c860282e793f1856fec3f301b56" - integrity sha512-+w9D5ulSoBNlmw9OHn3U2v51SyoCd0he+bB3xMl62oijhrspxowjU+AIcDY0N3iEJbUEkB15IlMASQsxYigvXg== - dependencies: - kind-of "^3.0.2" - -is-data-descriptor@^1.0.0: - version "1.0.0" - resolved "https://registry.yarnpkg.com/is-data-descriptor/-/is-data-descriptor-1.0.0.tgz#d84876321d0e7add03990406abbbbd36ba9268c7" - integrity sha512-jbRXy1FmtAoCjQkVmIVYwuuqDFUbaOeDjmed1tOGPrsMhtJA4rD9tkgA0F1qJ3gRFRXcHYVkdeaP50Q5rE/jLQ== - dependencies: - kind-of "^6.0.0" - -is-descriptor@^0.1.0: - version "0.1.6" - resolved "https://registry.yarnpkg.com/is-descriptor/-/is-descriptor-0.1.6.tgz#366d8240dde487ca51823b1ab9f07a10a78251ca" - integrity sha512-avDYr0SB3DwO9zsMov0gKCESFYqCnE4hq/4z3TdUlukEy5t9C0YRq7HLrsN52NAcqXKaepeCD0n+B0arnVG3Hg== - dependencies: - is-accessor-descriptor "^0.1.6" - is-data-descriptor "^0.1.4" - kind-of "^5.0.0" - -is-descriptor@^1.0.0, is-descriptor@^1.0.2: - version "1.0.2" - resolved "https://registry.yarnpkg.com/is-descriptor/-/is-descriptor-1.0.2.tgz#3b159746a66604b04f8c81524ba365c5f14d86ec" - integrity sha512-2eis5WqQGV7peooDyLmNEPUrps9+SXX5c9pL3xEB+4e9HnGuDa7mB7kHxHw4CbqS9k1T2hOH3miL8n8WtiYVtg== - dependencies: - is-accessor-descriptor "^1.0.0" - is-data-descriptor "^1.0.0" - kind-of "^6.0.2" - is-directory@^0.3.1: version "0.3.1" resolved "https://registry.yarnpkg.com/is-directory/-/is-directory-0.3.1.tgz#61339b6f2475fc772fd9c9d83f5c8575dc154ae1" integrity sha512-yVChGzahRFvbkscn2MlwGismPO12i9+znNruC5gVEntG3qu0xQMzsGg/JFbrsqDOHtHFPci+V5aP5T9I+yeKqw== -is-extendable@^0.1.0, is-extendable@^0.1.1: - version "0.1.1" - resolved "https://registry.yarnpkg.com/is-extendable/-/is-extendable-0.1.1.tgz#62b110e289a471418e3ec36a617d472e301dfc89" - integrity sha512-5BMULNob1vgFX6EjQw5izWDxrecWK9AM72rugNr0TFldMOi0fj6Jk+zeKIt0xGj4cEfQIJth4w3OKWOJ4f+AFw== - -is-extendable@^1.0.1: - version "1.0.1" - resolved "https://registry.yarnpkg.com/is-extendable/-/is-extendable-1.0.1.tgz#a7470f9e426733d81bd81e1155264e3a3507cab4" - integrity sha512-arnXMxT1hhoKo9k1LZdmlNyJdDDfy2v0fXjFlmok4+i8ul/6WlbVge9bhM74OpNPQPMGUToDtz+KXa1PneJxOA== - dependencies: - is-plain-object "^2.0.4" - is-fullwidth-code-point@^2.0.0: version "2.0.0" resolved "https://registry.yarnpkg.com/is-fullwidth-code-point/-/is-fullwidth-code-point-2.0.0.tgz#a3b30a5c4f199183167aaab93beefae3ddfb654f" @@ -3462,13 +3298,6 @@ is-interactive@^1.0.0: resolved "https://registry.yarnpkg.com/is-interactive/-/is-interactive-1.0.0.tgz#cea6e6ae5c870a7b0a0004070b7b587e0252912e" integrity sha512-2HvIEKRoqS62guEC+qBjpvRubdX910WCMuJTZ+I9yvqKU2/12eSL549HMwtabb4oupdj2sMP50k+XJfB/8JE6w== -is-number@^3.0.0: - version "3.0.0" - resolved "https://registry.yarnpkg.com/is-number/-/is-number-3.0.0.tgz#24fd6201a4782cf50561c810276afc7d12d71195" - integrity sha512-4cboCqIpliH+mAvFNegjZQ4kgKc3ZUhQVr3HvWbSh5q3WH2v82ct+T2Y1hdU5Gdtorx/cLifQjqCbL7bpznLTg== - dependencies: - kind-of "^3.0.2" - is-number@^7.0.0: version "7.0.0" resolved "https://registry.yarnpkg.com/is-number/-/is-number-7.0.0.tgz#7535345b896734d5f80c4d06c50955527a14f12b" @@ -3479,7 +3308,7 @@ is-plain-obj@^2.1.0: resolved "https://registry.yarnpkg.com/is-plain-obj/-/is-plain-obj-2.1.0.tgz#45e42e37fccf1f40da8e5f76ee21515840c09287" integrity sha512-YWnfyRwxL/+SsrWYfOpUtz5b3YD+nyfkHvjbcanzk8zgyO4ASD67uVMRt8k5bM4lLMDnXfriRhOpemw+NfT1eA== -is-plain-object@^2.0.3, is-plain-object@^2.0.4: +is-plain-object@^2.0.4: version "2.0.4" resolved "https://registry.yarnpkg.com/is-plain-object/-/is-plain-object-2.0.4.tgz#2c163b3fafb1b606d9d17928f05c2a1c38e07677" integrity sha512-h5PpgXkWitc38BBMYawTYMWJHFZJVnBquFE57xFpjB8pJFiF6gZ+bU+WyI/yqXiFR5mdLsgYNaPe8uao6Uv9Og== @@ -3501,17 +3330,12 @@ is-unicode-supported@^0.1.0: resolved "https://registry.yarnpkg.com/is-unicode-supported/-/is-unicode-supported-0.1.0.tgz#3f26c76a809593b52bfa2ecb5710ed2779b522a7" integrity sha512-knxG2q4UC3u8stRGyAVJCOdxFmv5DZiRcdlIaAQXAbSfJya+OhopNotLQrstBhququ4ZpuKbDc/8S6mgXgPFPw== -is-windows@^1.0.2: - version "1.0.2" - resolved "https://registry.yarnpkg.com/is-windows/-/is-windows-1.0.2.tgz#d1850eb9791ecd18e6182ce12a30f396634bb19d" - integrity sha512-eXK1UInq2bPmjyX6e3VHIzMLobc4J94i4AWn+Hpq3OU5KkrRC96OAcR3PRJ/pGu6m8TRnBHP9dkXQVsT/COVIA== - is-wsl@^1.1.0: version "1.1.0" resolved "https://registry.yarnpkg.com/is-wsl/-/is-wsl-1.1.0.tgz#1f16e4aa22b04d1336b66188a66af3c600c3a66d" integrity sha512-gfygJYZ2gLTDlmbWMI0CE2MwnFzSN/2SZfkMlItC4K/JBlsWVDB0bO6XhqcY13YXE7iMcAJnzTCJjPiTeJJ0Mw== -isarray@1.0.0, isarray@~1.0.0: +isarray@~1.0.0: version "1.0.0" resolved "https://registry.yarnpkg.com/isarray/-/isarray-1.0.0.tgz#bb935d48582cba168c06834957a54a3e07124f11" integrity sha512-VLghIWNM6ELQzo7zwmcg0NmTVyWKYjvIeM83yjp0wRDTmUnrM678fQbcKBo6n2CJEF0szoG//ytg+TKla89ALQ== @@ -3521,14 +3345,7 @@ isexe@^2.0.0: resolved "https://registry.yarnpkg.com/isexe/-/isexe-2.0.0.tgz#e8fbf374dc556ff8947a10dcb0572d633f2cfa10" integrity sha512-RHxMLp9lnKHGHRng9QFhRCMbYAcVpn69smSGcq3f36xjgVVWThj4qqLbTLlq7Ssj8B+fIQ1EuCEGI2lKsyQeIw== -isobject@^2.0.0: - version "2.1.0" - resolved "https://registry.yarnpkg.com/isobject/-/isobject-2.1.0.tgz#f065561096a3f1da2ef46272f815c840d87e0c89" - integrity sha512-+OUdGJlgjOBZDfxnDjYYG6zp487z0JGNQq3cYQYg5f5hKR+syHMsaztzGeml/4kGG55CSpKSpWTY+jYGgsHLgA== - dependencies: - isarray "1.0.0" - -isobject@^3.0.0, isobject@^3.0.1: +isobject@^3.0.1: version "3.0.1" resolved "https://registry.yarnpkg.com/isobject/-/isobject-3.0.1.tgz#4e431e92b11a9731636aa1f9c8d1ccbcfdab78df" integrity sha512-WhB9zCku7EGTj/HQQRz5aUQEUeoQZH2bWcltRErOpymJ4boYE6wL9Tbr23krRPSZ+C5zqNSrSw+Cc7sZZ4b7vg== @@ -3705,26 +3522,6 @@ jest-get-type@^29.4.3: resolved "https://registry.yarnpkg.com/jest-get-type/-/jest-get-type-29.4.3.tgz#1ab7a5207c995161100b5187159ca82dd48b3dd5" integrity sha512-J5Xez4nRRMjk8emnTpWrlkyb9pfRQQanDrvWHhsR1+VUfbwxi30eVcZFlcdGInRibU4G5LwHXpI7IRHU0CY+gg== -jest-haste-map@^27.3.1: - version "27.5.1" - resolved "https://registry.yarnpkg.com/jest-haste-map/-/jest-haste-map-27.5.1.tgz#9fd8bd7e7b4fa502d9c6164c5640512b4e811e7f" - integrity sha512-7GgkZ4Fw4NFbMSDSpZwXeBiIbx+t/46nJ2QitkOjvwPYyZmqttu2TDSimMHP1EkPOi4xUZAN1doE5Vd25H4Jng== - dependencies: - "@jest/types" "^27.5.1" - "@types/graceful-fs" "^4.1.2" - "@types/node" "*" - anymatch "^3.0.3" - fb-watchman "^2.0.0" - graceful-fs "^4.2.9" - jest-regex-util "^27.5.1" - jest-serializer "^27.5.1" - jest-util "^27.5.1" - jest-worker "^27.5.1" - micromatch "^4.0.4" - walker "^1.0.7" - optionalDependencies: - fsevents "^2.3.2" - jest-haste-map@^29.5.0: version "29.5.0" resolved "https://registry.yarnpkg.com/jest-haste-map/-/jest-haste-map-29.5.0.tgz#69bd67dc9012d6e2723f20a945099e972b2e94de" @@ -3744,6 +3541,16 @@ jest-haste-map@^29.5.0: optionalDependencies: fsevents "^2.3.2" +jest-junit@^16.0.0: + version "16.0.0" + resolved "https://registry.yarnpkg.com/jest-junit/-/jest-junit-16.0.0.tgz#d838e8c561cf9fdd7eb54f63020777eee4136785" + integrity sha512-A94mmw6NfJab4Fg/BlvVOUXzXgF0XIH6EmTgJ5NDPp4xoKq0Kr7sErb+4Xs9nZvu58pJojz5RFGpqnZYJTrRfQ== + dependencies: + mkdirp "^1.0.4" + strip-ansi "^6.0.1" + uuid "^8.3.2" + xml "^1.0.1" + jest-leak-detector@^29.5.0: version "29.5.0" resolved "https://registry.yarnpkg.com/jest-leak-detector/-/jest-leak-detector-29.5.0.tgz#cf4bdea9615c72bac4a3a7ba7e7930f9c0610c8c" @@ -3791,7 +3598,7 @@ jest-pnp-resolver@^1.2.2: resolved "https://registry.yarnpkg.com/jest-pnp-resolver/-/jest-pnp-resolver-1.2.3.tgz#930b1546164d4ad5937d5540e711d4d38d4cad2e" integrity sha512-+3NpwQEnRoIBtx4fyhblQDPgJI0H1IEIkX7ShLUjPGA7TtUTvI1oiKi3SR4oBR0hQhQR80l4WAe5RrXBwWMA8w== -jest-regex-util@^27.5.1: +jest-regex-util@^27.0.6: version "27.5.1" resolved "https://registry.yarnpkg.com/jest-regex-util/-/jest-regex-util-27.5.1.tgz#4da143f7e9fd1e542d4aa69617b38e4a78365b95" integrity sha512-4bfKq2zie+x16okqDXjXn9ql2B0dScQu+vcwe4TvFVhkVyuWLqpZrZtXxLLWoXYgn0E87I6r6GRYHF7wFZBUvg== @@ -3879,7 +3686,7 @@ jest-runtime@^29.5.0: slash "^3.0.0" strip-bom "^4.0.0" -jest-serializer@^27.5.1: +jest-serializer@^27.0.6: version "27.5.1" resolved "https://registry.yarnpkg.com/jest-serializer/-/jest-serializer-27.5.1.tgz#81438410a30ea66fd57ff730835123dea1fb1f64" integrity sha512-jZCyo6iIxO1aqUxpuBlwTDMkzOAJS4a3eYz3YzgxxVQFwLeSA7Jfq5cbqCY+JLvTDrWirgusI/0KwxKMgrdf7w== @@ -3916,7 +3723,7 @@ jest-snapshot@^29.5.0: pretty-format "^29.5.0" semver "^7.3.5" -jest-util@^27.5.1: +jest-util@^27.2.0: version "27.5.1" resolved "https://registry.yarnpkg.com/jest-util/-/jest-util-27.5.1.tgz#3ba9771e8e31a0b85da48fe0b0891fb86c01c2f9" integrity sha512-Kv2o/8jNvX1MQ0KGtw480E/w4fBCDOnH6+6DmeKi6LZUIlKA5kwY0YNdlzaWTiVgxqAqik11QyxDOKk543aKXw== @@ -3978,7 +3785,7 @@ jest-watcher@^29.5.0: jest-util "^29.5.0" string-length "^4.0.1" -jest-worker@^27.2.0, jest-worker@^27.5.1: +jest-worker@^27.2.0: version "27.5.1" resolved "https://registry.yarnpkg.com/jest-worker/-/jest-worker-27.5.1.tgz#8d146f0900e8973b106b6f73cc1e9a8cb86f8db0" integrity sha512-7vuh85V5cdDofPyxn58nrPjBktZo0u9x1g8WtjQol+jZDaE+fhN+cIvTj11GndBnMnyfrUOG1sZQxCdjKh+DKg== @@ -4007,11 +3814,6 @@ jest@^29: import-local "^3.0.2" jest-cli "^29.5.0" -jetifier@^1.6.2: - version "1.6.8" - resolved "https://registry.yarnpkg.com/jetifier/-/jetifier-1.6.8.tgz#e88068697875cbda98c32472902c4d3756247798" - integrity sha512-3Zi16h6L5tXDRQJTb221cnRoVG9/9OvreLdLU2/ZjRv/GILL+2Cemt0IKvkowwkDpvouAU1DQPOJ7qaiHeIdrw== - joi@^17.2.1: version "17.6.0" resolved "https://registry.yarnpkg.com/joi/-/joi-17.6.0.tgz#0bb54f2f006c09a96e75ce687957bd04290054b2" @@ -4053,10 +3855,15 @@ jsc-android@^250230.2.1: resolved "https://registry.yarnpkg.com/jsc-android/-/jsc-android-250230.2.1.tgz#3790313a970586a03ab0ad47defbc84df54f1b83" integrity sha512-KmxeBlRjwoqCnBBKGsihFtvsBHyUFlBxJPK4FzeYcIuBfdjv6jFys44JITAgSTbQD+vIdwMEfyZklsuQX0yI1Q== -jscodeshift@^0.13.1: - version "0.13.1" - resolved "https://registry.yarnpkg.com/jscodeshift/-/jscodeshift-0.13.1.tgz#69bfe51e54c831296380585c6d9e733512aecdef" - integrity sha512-lGyiEbGOvmMRKgWk4vf+lUrCWO/8YR8sUR3FKF1Cq5fovjZDlIcw3Hu5ppLHAnEXshVffvaM0eyuY/AbOeYpnQ== +jsc-safe-url@^0.2.2: + version "0.2.4" + resolved "https://registry.yarnpkg.com/jsc-safe-url/-/jsc-safe-url-0.2.4.tgz#141c14fbb43791e88d5dc64e85a374575a83477a" + integrity sha512-0wM3YBWtYePOjfyXQH5MWQ8H7sdk5EXSwZvmSLKk2RboVQ2Bu239jycHDz5J/8Blf3K0Qnoy2b6xD+z10MFB+Q== + +jscodeshift@^0.14.0: + version "0.14.0" + resolved "https://registry.yarnpkg.com/jscodeshift/-/jscodeshift-0.14.0.tgz#7542e6715d6d2e8bde0b4e883f0ccea358b46881" + integrity sha512-7eCC1knD7bLUPuSCwXsMZUH51O8jIcoVyKtI6P0XM0IVzlGjckPy3FIwQlorzbN0Sg79oK+RlohN32Mqf/lrYA== dependencies: "@babel/core" "^7.13.16" "@babel/parser" "^7.13.16" @@ -4071,10 +3878,10 @@ jscodeshift@^0.13.1: chalk "^4.1.2" flow-parser "0.*" graceful-fs "^4.2.4" - micromatch "^3.1.10" + micromatch "^4.0.4" neo-async "^2.5.0" node-dir "^0.1.17" - recast "^0.20.4" + recast "^0.21.0" temp "^0.8.4" write-file-atomic "^2.3.0" @@ -4137,26 +3944,7 @@ jsonfile@^4.0.0: optionalDependencies: graceful-fs "^4.1.6" -kind-of@^3.0.2, kind-of@^3.0.3, kind-of@^3.2.0: - version "3.2.2" - resolved "https://registry.yarnpkg.com/kind-of/-/kind-of-3.2.2.tgz#31ea21a734bab9bbb0f32466d893aea51e4a3c64" - integrity sha512-NOW9QQXMoZGg/oqnVNoNTTIFEIid1627WCffUBJEdMxYApq7mNE7CpzucIPc+ZQg25Phej7IJSmX3hO+oblOtQ== - dependencies: - is-buffer "^1.1.5" - -kind-of@^4.0.0: - version "4.0.0" - resolved "https://registry.yarnpkg.com/kind-of/-/kind-of-4.0.0.tgz#20813df3d712928b207378691a45066fae72dd57" - integrity sha512-24XsCxmEbRwEDbz/qz3stgin8TTzZ1ESR56OMCN0ujYg+vRutNSiOj9bHH9u85DKgXguraugV5sFuvbD4FW/hw== - dependencies: - is-buffer "^1.1.5" - -kind-of@^5.0.0: - version "5.1.0" - resolved "https://registry.yarnpkg.com/kind-of/-/kind-of-5.1.0.tgz#729c91e2d857b7a419a1f9aa65685c4c33f5845d" - integrity sha512-NGEErnH6F2vUuXDh+OlbcKW7/wOcfdRHaZ7VWtqCztfHri/++YKmP51OdWeGPuqCOba6kk2OTe5d02VmTB80Pw== - -kind-of@^6.0.0, kind-of@^6.0.2: +kind-of@^6.0.2: version "6.0.3" resolved "https://registry.yarnpkg.com/kind-of/-/kind-of-6.0.3.tgz#07c05034a6c349fa06e24fa35aa76db4580ce4dd" integrity sha512-dcS1ul+9tmeD95T+x28/ehLgd9mENa3LsvDTtzm3vyBEO7RPptvAD+t44WVXaUjTBRcrpFeFlC8WCruUR456hw== @@ -4215,7 +4003,7 @@ lodash.throttle@^4.1.1: resolved "https://registry.yarnpkg.com/lodash.throttle/-/lodash.throttle-4.1.1.tgz#c23e91b710242ac70c37f1e1cda9274cc39bf2f4" integrity sha512-wIkUCfVKpVsWo3JSZlc+8MB5it+2AN5W8J7YVMST30UrvcQNZ1Okbj+rbVniijTWE6FGYy4XJq/rHkas8qJMLQ== -lodash@^4.17.11, lodash@^4.17.15, lodash@^4.17.21: +lodash@^4.17.11, lodash@^4.17.21: version "4.17.21" resolved "https://registry.yarnpkg.com/lodash/-/lodash-4.17.21.tgz#679591c564c3bffaae8454cf0b3df370c3d6911c" integrity sha512-v2kDEe57lecTulaDIuNTPy3Ry4gLGJ6Z1O3vE1krgXZNrsQ+LFTGHVxVjcXPs17LhbZVGedAJv8XZ1tvj5FvSg== @@ -4288,18 +4076,6 @@ makeerror@1.0.12: dependencies: tmpl "1.0.5" -map-cache@^0.2.2: - version "0.2.2" - resolved "https://registry.yarnpkg.com/map-cache/-/map-cache-0.2.2.tgz#c32abd0bd6525d9b051645bb4f26ac5dc98a0dbf" - integrity sha512-8y/eV9QQZCiyn1SprXSrCmqJN0yNRATe+PO8ztwqrvrbdRLA3eYJF0yaR0YayLWkMbsQSKWS9N2gPcGEc4UsZg== - -map-visit@^1.0.0: - version "1.0.0" - resolved "https://registry.yarnpkg.com/map-visit/-/map-visit-1.0.0.tgz#ecdca8f13144e660f1b5bd41f12f3479d98dfb8f" - integrity sha512-4y7uGv8bd2WdM9vpQsiQNo41Ln1NvhvDRuVt0k2JZQ+ezN2uaQes7lZeZ+QQUHOLQAtDaBJ+7wCbi+ab/KFs+w== - dependencies: - object-visit "^1.0.0" - memoize-one@^5.0.0: version "5.2.1" resolved "https://registry.yarnpkg.com/memoize-one/-/memoize-one-5.2.1.tgz#8337aa3c4335581839ec01c3d594090cebe8f00e" @@ -4310,76 +4086,95 @@ merge-stream@^2.0.0: resolved "https://registry.yarnpkg.com/merge-stream/-/merge-stream-2.0.0.tgz#52823629a14dd00c9770fb6ad47dc6310f2c1f60" integrity sha512-abv/qOcuPfk3URPfDzmZU1LKmuw8kT+0nIHvKrKgFrwifol/doWcdA4ZqsWQ8ENrFKkd67Mfpo/LovbIUsbt3w== -metro-babel-transformer@0.70.3: - version "0.70.3" - resolved "https://registry.yarnpkg.com/metro-babel-transformer/-/metro-babel-transformer-0.70.3.tgz#dca61852be273824a4b641bd1ecafff07ff3ad1f" - integrity sha512-bWhZRMn+mIOR/s3BDpFevWScz9sV8FGktVfMlF1eJBLoX24itHDbXvTktKBYi38PWIKcHedh6THSFpJogfuwNA== +metro-babel-transformer@0.72.4: + version "0.72.4" + resolved "https://registry.yarnpkg.com/metro-babel-transformer/-/metro-babel-transformer-0.72.4.tgz#5149424896797980aa1758c8ef7c9a80f9d0f587" + integrity sha512-cg1TQUKDkKqrIClrqqIGE8ZDa9kRKSjhBtqPtNYt/ZSywXU41SrldfcI5uzPrzcIrYpH5hnN6OCLRACPgy2vsw== dependencies: "@babel/core" "^7.14.0" - hermes-parser "0.6.0" - metro-source-map "0.70.3" + hermes-parser "0.8.0" + metro-source-map "0.72.4" nullthrows "^1.1.1" -metro-cache-key@0.70.3: - version "0.70.3" - resolved "https://registry.yarnpkg.com/metro-cache-key/-/metro-cache-key-0.70.3.tgz#898803db04178a8f440598afba7d82a9cf35abf7" - integrity sha512-0zpw+IcpM3hmGd5sKMdxNv3sbOIUYnMUvx1/yaM6vNRReSPmOLX0bP8fYf3CGgk8NEreZ1OHbVsuw7bdKt40Mw== +metro-cache-key@0.72.4: + version "0.72.4" + resolved "https://registry.yarnpkg.com/metro-cache-key/-/metro-cache-key-0.72.4.tgz#f03d49214554b25968f04dc5e19dfe018cf9312b" + integrity sha512-DH3cgN4L7IKNCVBy8LBOXQ4tHDdvh7Vl7jWNkQKMOfHWu1EwsTtXD/+zdV7/be4ls/kHxrD0HbGzpK8XhUAHSw== -metro-cache@0.70.3: - version "0.70.3" - resolved "https://registry.yarnpkg.com/metro-cache/-/metro-cache-0.70.3.tgz#42cf3cdf8a7b3691f3bef9a86bed38d4c5f6201f" - integrity sha512-iCix/+z812fUqa6KlOxaTkY6LQQDoXIe/VljXkGIvpygSCmYyhjQpfQVZEVVPezFmUBYXNdabdQ6cYx6JX3yMg== +metro-cache@0.72.4: + version "0.72.4" + resolved "https://registry.yarnpkg.com/metro-cache/-/metro-cache-0.72.4.tgz#e0ffb33dd044a7cf5897a09489088a413bfe7468" + integrity sha512-76fi9OVytiFVSuGQcNoquVOT7AENd0q3n1WmyBeJ7jvl/UrE3/NN3HTWzu2ezG5IxF3cmo5q1ehi0NEpgwaFGg== dependencies: - metro-core "0.70.3" + metro-core "0.72.4" rimraf "^2.5.4" -metro-config@0.70.3, metro-config@^0.70.1: - version "0.70.3" - resolved "https://registry.yarnpkg.com/metro-config/-/metro-config-0.70.3.tgz#fe6f7330f679d5594e5724af7a69d4dbe1bb5bc3" - integrity sha512-SSCDjSTygoCgzoj61DdrBeJzZDRwQxUEfcgc6t6coxWSExXNR4mOngz0q4SAam49Bmjq9J2Jft6qUKnUTPrRgA== +metro-config@0.72.4: + version "0.72.4" + resolved "https://registry.yarnpkg.com/metro-config/-/metro-config-0.72.4.tgz#3ad42b3ca0037125d5615f4cb7e1c7ed9442bedd" + integrity sha512-USv+H14D5RrSpfA5t4t5cbF1CnizgYGz6xJ3HB0r/bDYdJdZTVqB3/mMPft7Z5zHslS00JCG7oE51G1CK/FlKw== dependencies: cosmiconfig "^5.0.5" jest-validate "^26.5.2" - metro "0.70.3" - metro-cache "0.70.3" - metro-core "0.70.3" - metro-runtime "0.70.3" + metro "0.72.4" + metro-cache "0.72.4" + metro-core "0.72.4" + metro-runtime "0.72.4" -metro-core@0.70.3, metro-core@^0.70.1: - version "0.70.3" - resolved "https://registry.yarnpkg.com/metro-core/-/metro-core-0.70.3.tgz#bf4dda15a5185f5a7931de463a1b97ac9ef680a0" - integrity sha512-NzfHB/w5R7yLaOeU1tzPTbBzCRsYSvpKJkLMP0yudszKZzIAZqNdjoEJ9GZ688Wi0ynZxcU0BxukXh4my80ZBw== +metro-core@0.72.4: + version "0.72.4" + resolved "https://registry.yarnpkg.com/metro-core/-/metro-core-0.72.4.tgz#e4939aef4c50d953c44eee99a3c971d5162f1287" + integrity sha512-2JNT1nG0UV1uMrQHQOKUSII0sdS6MhVT3mBt2kwfjCvD+jvi1iYhKJ4kYCRlUQw9XNLGZ/B+C0VDQzlf2M3zVw== dependencies: - jest-haste-map "^27.3.1" lodash.throttle "^4.1.1" - metro-resolver "0.70.3" + metro-resolver "0.72.4" -metro-hermes-compiler@0.70.3: - version "0.70.3" - resolved "https://registry.yarnpkg.com/metro-hermes-compiler/-/metro-hermes-compiler-0.70.3.tgz#ac7ed656fbcf0a59adcd010d3639e4cfdbc76b4f" - integrity sha512-W6WttLi4E72JL/NyteQ84uxYOFMibe0PUr9aBKuJxxfCq6QRnJKOVcNY0NLW0He2tneXGk+8ZsNz8c0flEvYqg== +metro-file-map@0.72.4: + version "0.72.4" + resolved "https://registry.yarnpkg.com/metro-file-map/-/metro-file-map-0.72.4.tgz#8a0c8a0e44d665af90dded2ac6e01baebff8552e" + integrity sha512-Mv5WgTsYs5svTR/df6jhq2aD4IkAuwV5TutHW0BfEg1YccQt8/v7q5ZypmUOkjdSS9bFR4r3677jalr/ceFypQ== + dependencies: + abort-controller "^3.0.0" + anymatch "^3.0.3" + debug "^2.2.0" + fb-watchman "^2.0.0" + graceful-fs "^4.2.4" + invariant "^2.2.4" + jest-regex-util "^27.0.6" + jest-serializer "^27.0.6" + jest-util "^27.2.0" + jest-worker "^27.2.0" + micromatch "^4.0.4" + walker "^1.0.7" + optionalDependencies: + fsevents "^2.1.2" -metro-inspector-proxy@0.70.3: - version "0.70.3" - resolved "https://registry.yarnpkg.com/metro-inspector-proxy/-/metro-inspector-proxy-0.70.3.tgz#321c25b2261e76d8c4bcc39e092714adfcb50a14" - integrity sha512-qQoNdPGrmyoJSWYkxSDpTaAI8xyqVdNDVVj9KRm1PG8niSuYmrCCFGLLFsMvkVYwsCWUGHoGBx0UoAzVp14ejw== +metro-hermes-compiler@0.72.4: + version "0.72.4" + resolved "https://registry.yarnpkg.com/metro-hermes-compiler/-/metro-hermes-compiler-0.72.4.tgz#06c946d74720d5132fa1690df0610ba367d3436c" + integrity sha512-AY1mAT5FKfDRYCthuKo2XHbuhG5TUV4ZpZlJ8peIgkiWICzfy0tau3yu+3jUD456N90CjMCOmdknji4uKiZ8ww== + +metro-inspector-proxy@0.72.4: + version "0.72.4" + resolved "https://registry.yarnpkg.com/metro-inspector-proxy/-/metro-inspector-proxy-0.72.4.tgz#347e9634b6204c38117292edfb11eb2df71c09ad" + integrity sha512-pr+PsbNCZaStWuJRH8oclT170B7NxfgH+UUyTf9/aR+7PjX0gdDabJhPyzA633QgR+EFBaQKZuetHA+f5/cnEQ== dependencies: connect "^3.6.5" debug "^2.2.0" ws "^7.5.1" yargs "^15.3.1" -metro-minify-uglify@0.70.3: - version "0.70.3" - resolved "https://registry.yarnpkg.com/metro-minify-uglify/-/metro-minify-uglify-0.70.3.tgz#2f28129ca5b8ef958f3e3fcf004c3707c7732e1e" - integrity sha512-oHyjV9WDqOlDE1FPtvs6tIjjeY/oP1PNUPYL1wqyYtqvjN+zzAOrcbsAAL1sv+WARaeiMsWkF2bwtNo+Hghoog== +metro-minify-uglify@0.72.4: + version "0.72.4" + resolved "https://registry.yarnpkg.com/metro-minify-uglify/-/metro-minify-uglify-0.72.4.tgz#b4504adc17f093173c0e5d44df32ac9e13f50a88" + integrity sha512-84Rrgie3O7Dqkak9ep/eIpMZkEFzpKD4bngPUNimYqAMCExKL7/aymydB27gKcqwus/BVkAV+aOnFsuOhlgnQg== dependencies: uglify-es "^3.1.9" -metro-react-native-babel-preset@0.70.3: - version "0.70.3" - resolved "https://registry.yarnpkg.com/metro-react-native-babel-preset/-/metro-react-native-babel-preset-0.70.3.tgz#1c77ec4544ecd5fb6c803e70b21284d7483e4842" - integrity sha512-4Nxc1zEiHEu+GTdEMEsHnRgfaBkg8f/Td3+FcQ8NTSvs+xL3LBrQy6N07idWSQZHIdGFf+tTHvRfSIWLD8u8Tg== +metro-react-native-babel-preset@0.72.4: + version "0.72.4" + resolved "https://registry.yarnpkg.com/metro-react-native-babel-preset/-/metro-react-native-babel-preset-0.72.4.tgz#2b320772d2489d1fb3a6413fc58dad13a56eea0e" + integrity sha512-YGCVaYe1H5fOFktdDdL9IwAyiXjPh1t2eZZFp3KFJak6fxKpN+q5PPhe1kzMa77dbCAqgImv43zkfGa6i27eyA== dependencies: "@babel/core" "^7.14.0" "@babel/plugin-proposal-async-generator-functions" "^7.0.0" @@ -4421,109 +4216,64 @@ metro-react-native-babel-preset@0.70.3: "@babel/template" "^7.0.0" react-refresh "^0.4.0" -metro-react-native-babel-preset@^0.67.0: - version "0.67.0" - resolved "https://registry.yarnpkg.com/metro-react-native-babel-preset/-/metro-react-native-babel-preset-0.67.0.tgz#53aec093f53a09b56236a9bb534d76658efcbec7" - integrity sha512-tgTG4j0SKwLHbLRELMmgkgkjV1biYkWlGGKOmM484/fJC6bpDikdaFhfjsyE+W+qt7I5szbCPCickMTNQ+zwig== - dependencies: - "@babel/core" "^7.14.0" - "@babel/plugin-proposal-class-properties" "^7.0.0" - "@babel/plugin-proposal-export-default-from" "^7.0.0" - "@babel/plugin-proposal-nullish-coalescing-operator" "^7.0.0" - "@babel/plugin-proposal-object-rest-spread" "^7.0.0" - "@babel/plugin-proposal-optional-catch-binding" "^7.0.0" - "@babel/plugin-proposal-optional-chaining" "^7.0.0" - "@babel/plugin-syntax-dynamic-import" "^7.0.0" - "@babel/plugin-syntax-export-default-from" "^7.0.0" - "@babel/plugin-syntax-flow" "^7.2.0" - "@babel/plugin-syntax-nullish-coalescing-operator" "^7.0.0" - "@babel/plugin-syntax-optional-chaining" "^7.0.0" - "@babel/plugin-transform-arrow-functions" "^7.0.0" - "@babel/plugin-transform-async-to-generator" "^7.0.0" - "@babel/plugin-transform-block-scoping" "^7.0.0" - "@babel/plugin-transform-classes" "^7.0.0" - "@babel/plugin-transform-computed-properties" "^7.0.0" - "@babel/plugin-transform-destructuring" "^7.0.0" - "@babel/plugin-transform-exponentiation-operator" "^7.0.0" - "@babel/plugin-transform-flow-strip-types" "^7.0.0" - "@babel/plugin-transform-for-of" "^7.0.0" - "@babel/plugin-transform-function-name" "^7.0.0" - "@babel/plugin-transform-literals" "^7.0.0" - "@babel/plugin-transform-modules-commonjs" "^7.0.0" - "@babel/plugin-transform-object-assign" "^7.0.0" - "@babel/plugin-transform-parameters" "^7.0.0" - "@babel/plugin-transform-react-display-name" "^7.0.0" - "@babel/plugin-transform-react-jsx" "^7.0.0" - "@babel/plugin-transform-react-jsx-self" "^7.0.0" - "@babel/plugin-transform-react-jsx-source" "^7.0.0" - "@babel/plugin-transform-regenerator" "^7.0.0" - "@babel/plugin-transform-runtime" "^7.0.0" - "@babel/plugin-transform-shorthand-properties" "^7.0.0" - "@babel/plugin-transform-spread" "^7.0.0" - "@babel/plugin-transform-sticky-regex" "^7.0.0" - "@babel/plugin-transform-template-literals" "^7.0.0" - "@babel/plugin-transform-typescript" "^7.5.0" - "@babel/plugin-transform-unicode-regex" "^7.0.0" - "@babel/template" "^7.0.0" - react-refresh "^0.4.0" - -metro-react-native-babel-transformer@0.70.3, metro-react-native-babel-transformer@^0.70.1: - version "0.70.3" - resolved "https://registry.yarnpkg.com/metro-react-native-babel-transformer/-/metro-react-native-babel-transformer-0.70.3.tgz#195597c32488f820aa9e441bbca7c04fe7de7a2d" - integrity sha512-WKBU6S/G50j9cfmFM4k4oRYprd8u3qjleD4so1E2zbTNILg+gYla7ZFGCAvi2G0ZcqS2XuGCR375c2hF6VVvwg== +metro-react-native-babel-transformer@0.72.4: + version "0.72.4" + resolved "https://registry.yarnpkg.com/metro-react-native-babel-transformer/-/metro-react-native-babel-transformer-0.72.4.tgz#c1a38bf28513374dbb0fce45b4017d8abfe4a071" + integrity sha512-VxM8Cki+/tPAyQRPHEy1bsxAihpxz8cGLdteFo9t0eAJI7/vEegqICxQm4A+RiGQc4f8t2jiwI6YpnDWomI5Gw== dependencies: "@babel/core" "^7.14.0" babel-preset-fbjs "^3.4.0" - hermes-parser "0.6.0" - metro-babel-transformer "0.70.3" - metro-react-native-babel-preset "0.70.3" - metro-source-map "0.70.3" + hermes-parser "0.8.0" + metro-babel-transformer "0.72.4" + metro-react-native-babel-preset "0.72.4" + metro-source-map "0.72.4" nullthrows "^1.1.1" -metro-resolver@0.70.3, metro-resolver@^0.70.1: - version "0.70.3" - resolved "https://registry.yarnpkg.com/metro-resolver/-/metro-resolver-0.70.3.tgz#c64fdd6d0a88fa62f3f99f87e539b5f603bd47bf" - integrity sha512-5Pc5S/Gs4RlLbziuIWtvtFd9GRoILlaRC8RZDVq5JZWcWHywKy/PjNmOBNhpyvtRlzpJfy/ssIfLhu8zINt1Mw== +metro-resolver@0.72.4: + version "0.72.4" + resolved "https://registry.yarnpkg.com/metro-resolver/-/metro-resolver-0.72.4.tgz#37893ff72273a2b7ea529564caa15fe2e2337267" + integrity sha512-aHxq/jypzGyi9Ic9woe//RymfxpzWliAkyTmBWPHE9ypGoiobstK0me2j5XuSfzASzCU8wcVt20qy870rxTWLw== dependencies: absolute-path "^0.0.0" -metro-runtime@0.70.3, metro-runtime@^0.70.1: - version "0.70.3" - resolved "https://registry.yarnpkg.com/metro-runtime/-/metro-runtime-0.70.3.tgz#09231b9d05dcbdfb5a13df0a45307273e6fe1168" - integrity sha512-22xU7UdXZacniTIDZgN2EYtmfau2pPyh97Dcs+cWrLcJYgfMKjWBtesnDcUAQy3PHekDYvBdJZkoQUeskYTM+w== +metro-runtime@0.72.4: + version "0.72.4" + resolved "https://registry.yarnpkg.com/metro-runtime/-/metro-runtime-0.72.4.tgz#b3469fd040a9526bfd897c0517c5f052a059ddeb" + integrity sha512-EA0ltqyYFpjOdpoRqE2U9FJleqTOIK+ZLRlLaDrx4yz3zTqUZ16W6w71dq+qrwD8BPg7bPKQu7RluU3K6tI79A== dependencies: "@babel/runtime" "^7.0.0" + react-refresh "^0.4.0" -metro-source-map@0.70.3: - version "0.70.3" - resolved "https://registry.yarnpkg.com/metro-source-map/-/metro-source-map-0.70.3.tgz#f5976108c18d4661eaa4d188c96713e5d67a903b" - integrity sha512-zsYtZGrwRbbGEFHtmMqqeCH9K9aTGNVPsurMOWCUeQA3VGyVGXPGtLMC+CdAM9jLpUyg6jw2xh0esxi+tYH7Uw== +metro-source-map@0.72.4: + version "0.72.4" + resolved "https://registry.yarnpkg.com/metro-source-map/-/metro-source-map-0.72.4.tgz#3c6444bba22b84d7d7e383f784a1d59e724192de" + integrity sha512-P09aMDEPkLo6BM8VYYoTsH/2B1w6t+mrCwNcNJV1zE+57FPiU4fSBlSeM8G9YeYaezDTHimS2JlMozP+2r+trA== dependencies: "@babel/traverse" "^7.14.0" "@babel/types" "^7.0.0" invariant "^2.2.4" - metro-symbolicate "0.70.3" + metro-symbolicate "0.72.4" nullthrows "^1.1.1" - ob1 "0.70.3" + ob1 "0.72.4" source-map "^0.5.6" vlq "^1.0.0" -metro-symbolicate@0.70.3: - version "0.70.3" - resolved "https://registry.yarnpkg.com/metro-symbolicate/-/metro-symbolicate-0.70.3.tgz#b039e5629c4ed0c999ea0496d580e1c98260f5cb" - integrity sha512-JTYkF1dpeDUssQ84juE1ycnhHki2ylJBBdJE1JHtfu5oC+z1ElDbBdPHq90Uvt8HbRov/ZAnxvv7Zy6asS+WCA== +metro-symbolicate@0.72.4: + version "0.72.4" + resolved "https://registry.yarnpkg.com/metro-symbolicate/-/metro-symbolicate-0.72.4.tgz#3be7c9d1f382fc58198efcb515f2de0ec3fc4181" + integrity sha512-6ZRo66Q4iKiwaQuHjmogkSCCqaSpJ4QzbHsVHRUe57mFIL34lOLYp7aPfmX7NHCmy061HhDox/kGuYZQRmHB3A== dependencies: invariant "^2.2.4" - metro-source-map "0.70.3" + metro-source-map "0.72.4" nullthrows "^1.1.1" source-map "^0.5.6" through2 "^2.0.1" vlq "^1.0.0" -metro-transform-plugins@0.70.3: - version "0.70.3" - resolved "https://registry.yarnpkg.com/metro-transform-plugins/-/metro-transform-plugins-0.70.3.tgz#7fe87cd0d8979b4d5d6e375751d86188fff38fd9" - integrity sha512-dQRIJoTkWZN2IVS2KzgS1hs7ZdHDX3fS3esfifPkqFAEwHiLctCf0EsPgIknp0AjMLvmGWfSLJigdRB/dc0ASw== +metro-transform-plugins@0.72.4: + version "0.72.4" + resolved "https://registry.yarnpkg.com/metro-transform-plugins/-/metro-transform-plugins-0.72.4.tgz#01e95aa277216fb0887610067125fac9271d399e" + integrity sha512-yxB4v/LxQkmN1rjyyeLiV4x+jwCmId4FTTxNrmTYoi0tFPtOBOeSwuqY08LjxZQMJdZOKXqj2bgIewqFXJEkGw== dependencies: "@babel/core" "^7.14.0" "@babel/generator" "^7.14.0" @@ -4531,29 +4281,29 @@ metro-transform-plugins@0.70.3: "@babel/traverse" "^7.14.0" nullthrows "^1.1.1" -metro-transform-worker@0.70.3: - version "0.70.3" - resolved "https://registry.yarnpkg.com/metro-transform-worker/-/metro-transform-worker-0.70.3.tgz#62bfa28ebef98803531c4bcb558de5fc804c94ef" - integrity sha512-MtVVsnHhhBOp9GRLCdAb2mD1dTCsIzT4+m34KMRdBDCEbDIb90YafT5prpU8qbj5uKd0o2FOQdrJ5iy5zQilHw== +metro-transform-worker@0.72.4: + version "0.72.4" + resolved "https://registry.yarnpkg.com/metro-transform-worker/-/metro-transform-worker-0.72.4.tgz#356903c343dc62373b928b4325ad09a103398cc5" + integrity sha512-mIvzy6nRQKMALEdF5g8LXPgCOUi/tGESE5dlb7OSMCj2FAFBm3mTLRrpW5phzK/J6Wg+4Vb9PMS+wGbXR261rA== dependencies: "@babel/core" "^7.14.0" "@babel/generator" "^7.14.0" "@babel/parser" "^7.14.0" "@babel/types" "^7.0.0" babel-preset-fbjs "^3.4.0" - metro "0.70.3" - metro-babel-transformer "0.70.3" - metro-cache "0.70.3" - metro-cache-key "0.70.3" - metro-hermes-compiler "0.70.3" - metro-source-map "0.70.3" - metro-transform-plugins "0.70.3" + metro "0.72.4" + metro-babel-transformer "0.72.4" + metro-cache "0.72.4" + metro-cache-key "0.72.4" + metro-hermes-compiler "0.72.4" + metro-source-map "0.72.4" + metro-transform-plugins "0.72.4" nullthrows "^1.1.1" -metro@0.70.3, metro@^0.70.1: - version "0.70.3" - resolved "https://registry.yarnpkg.com/metro/-/metro-0.70.3.tgz#4290f538ab5446c7050e718b5c5823eea292c5c2" - integrity sha512-uEWS7xg8oTetQDABYNtsyeUjdLhH3KAvLFpaFFoJqUpOk2A3iygszdqmjobFl6W4zrvKDJS+XxdMR1roYvUhTw== +metro@0.72.4: + version "0.72.4" + resolved "https://registry.yarnpkg.com/metro/-/metro-0.72.4.tgz#fdfc43b3329388b5a3e8856727403f93a8c05250" + integrity sha512-UBqL2fswJjsq2LlfMPV4ArqzLzjyN0nReKRijP3DdSxZiaJDG4NC9sQoVJHbH1HP5qXQMAK/SftyAx1c1kuy+w== dependencies: "@babel/code-frame" "^7.0.0" "@babel/core" "^7.14.0" @@ -4573,27 +4323,28 @@ metro@0.70.3, metro@^0.70.1: error-stack-parser "^2.0.6" fs-extra "^1.0.0" graceful-fs "^4.2.4" - hermes-parser "0.6.0" + hermes-parser "0.8.0" image-size "^0.6.0" invariant "^2.2.4" - jest-haste-map "^27.3.1" jest-worker "^27.2.0" + jsc-safe-url "^0.2.2" lodash.throttle "^4.1.1" - metro-babel-transformer "0.70.3" - metro-cache "0.70.3" - metro-cache-key "0.70.3" - metro-config "0.70.3" - metro-core "0.70.3" - metro-hermes-compiler "0.70.3" - metro-inspector-proxy "0.70.3" - metro-minify-uglify "0.70.3" - metro-react-native-babel-preset "0.70.3" - metro-resolver "0.70.3" - metro-runtime "0.70.3" - metro-source-map "0.70.3" - metro-symbolicate "0.70.3" - metro-transform-plugins "0.70.3" - metro-transform-worker "0.70.3" + metro-babel-transformer "0.72.4" + metro-cache "0.72.4" + metro-cache-key "0.72.4" + metro-config "0.72.4" + metro-core "0.72.4" + metro-file-map "0.72.4" + metro-hermes-compiler "0.72.4" + metro-inspector-proxy "0.72.4" + metro-minify-uglify "0.72.4" + metro-react-native-babel-preset "0.72.4" + metro-resolver "0.72.4" + metro-runtime "0.72.4" + metro-source-map "0.72.4" + metro-symbolicate "0.72.4" + metro-transform-plugins "0.72.4" + metro-transform-worker "0.72.4" mime-types "^2.1.27" node-fetch "^2.2.0" nullthrows "^1.1.1" @@ -4606,25 +4357,6 @@ metro@0.70.3, metro@^0.70.1: ws "^7.5.1" yargs "^15.3.1" -micromatch@^3.1.10: - version "3.1.10" - resolved "https://registry.yarnpkg.com/micromatch/-/micromatch-3.1.10.tgz#70859bc95c9840952f359a068a3fc49f9ecfac23" - integrity sha512-MWikgl9n9M3w+bpsY3He8L+w9eF9338xRl8IAO5viDizwSzziFEyUzo2xrrloB64ADbTf8uA8vRqqttDTOmccg== - dependencies: - arr-diff "^4.0.0" - array-unique "^0.3.2" - braces "^2.3.1" - define-property "^2.0.2" - extend-shallow "^3.0.2" - extglob "^2.0.4" - fragment-cache "^0.2.1" - kind-of "^6.0.2" - nanomatch "^1.2.9" - object.pick "^1.3.0" - regex-not "^1.0.0" - snapdragon "^0.8.1" - to-regex "^3.0.2" - micromatch@^4.0.4: version "4.0.5" resolved "https://registry.yarnpkg.com/micromatch/-/micromatch-4.0.5.tgz#bc8999a7cbbf77cdc89f132f6e467051b49090c6" @@ -4674,19 +4406,11 @@ minimatch@^5.0.1: dependencies: brace-expansion "^2.0.1" -minimist@^1.2.0, minimist@^1.2.6: +minimist@^1.2.6: version "1.2.6" resolved "https://registry.yarnpkg.com/minimist/-/minimist-1.2.6.tgz#8637a5b759ea0d6e98702cfb3a9283323c93af44" integrity sha512-Jsjnk4bw3YJqYzbdyBiNsPWHPfO++UGG749Cxs6peCu5Xg4nrena6OVxOYxrQTqww0Jmwt+Ref8rggumkTLz9Q== -mixin-deep@^1.2.0: - version "1.3.2" - resolved "https://registry.yarnpkg.com/mixin-deep/-/mixin-deep-1.3.2.tgz#1120b43dc359a785dce65b55b82e257ccf479566" - integrity sha512-WRoDn//mXBiJ1H40rqa3vH0toePwSsGb45iInWlTySa+Uu4k3tYUSxa2v1KqAiLtvlrSzaExqS1gtk96A9zvEA== - dependencies: - for-in "^1.0.2" - is-extendable "^1.0.1" - mkdirp@^0.5.1, mkdirp@~0.5.1: version "0.5.6" resolved "https://registry.yarnpkg.com/mkdirp/-/mkdirp-0.5.6.tgz#7def03d2432dcae4ba1d611445c48396062255f6" @@ -4694,6 +4418,11 @@ mkdirp@^0.5.1, mkdirp@~0.5.1: dependencies: minimist "^1.2.6" +mkdirp@^1.0.4: + version "1.0.4" + resolved "https://registry.yarnpkg.com/mkdirp/-/mkdirp-1.0.4.tgz#3eb5ed62622756d79a5f0e2a221dfebad75c2f7e" + integrity sha512-vVqVZQyf3WLx2Shd0qJ9xuvqgAyKPLAiqITEtqW0oIUjzo3PePDd6fW9iFz30ef7Ysp/oiWqbhszeGWW2T6Gzw== + moment@^2.19.3: version "2.29.4" resolved "https://registry.yarnpkg.com/moment/-/moment-2.29.4.tgz#3dbe052889fe7c1b2ed966fcb3a77328964ef108" @@ -4709,7 +4438,7 @@ ms@2.1.2: resolved "https://registry.yarnpkg.com/ms/-/ms-2.1.2.tgz#d09d1f357b443f493382a8eb3ccd183872ae6009" integrity sha512-sGkPx+VjMtmA6MX27oA4FBFELFCZZ4S4XqeGOXCv68tT+jb3vk/RyaKWP0PTKyWtmLSM0b+adUTEvbs1PEaH2w== -ms@2.1.3: +ms@2.1.3, ms@^2.1.3: version "2.1.3" resolved "https://registry.yarnpkg.com/ms/-/ms-2.1.3.tgz#574c8138ce1d2b5861f0b44579dbadd60c6615b2" integrity sha512-6FlzubTLZG3J2a/NVCAleEhjzq5oxgHyaCU9yYXvcLsvoVaHJq/s5xXI6/XXP6tz7R9xAOtHnSO/tXtF3WRTlA== @@ -4741,23 +4470,6 @@ nan@^2.14.0: resolved "https://registry.yarnpkg.com/nan/-/nan-2.17.0.tgz#c0150a2368a182f033e9aa5195ec76ea41a199cb" integrity sha512-2ZTgtl0nJsO0KQCjEpxcIr5D+Yv90plTitZt9JBfQvVJDS5seMl3FOvsh3+9CoYWXf/1l5OaZzzF6nDm4cagaQ== -nanomatch@^1.2.9: - version "1.2.13" - resolved "https://registry.yarnpkg.com/nanomatch/-/nanomatch-1.2.13.tgz#b87a8aa4fc0de8fe6be88895b38983ff265bd119" - integrity sha512-fpoe2T0RbHwBTBUOftAfBPaDEi06ufaUai0mE6Yn1kacc3SnTErfb/h+X94VXzI64rKFHYImXSvdwGGCmwOqCA== - dependencies: - arr-diff "^4.0.0" - array-unique "^0.3.2" - define-property "^2.0.2" - extend-shallow "^3.0.2" - fragment-cache "^0.2.1" - is-windows "^1.0.2" - kind-of "^6.0.2" - object.pick "^1.3.0" - regex-not "^1.0.0" - snapdragon "^0.8.1" - to-regex "^3.0.1" - natural-compare@^1.4.0: version "1.4.0" resolved "https://registry.yarnpkg.com/natural-compare/-/natural-compare-1.4.0.tgz#4abebfeed7541f2c27acfb29bdbbd15c8d5ba4f7" @@ -4860,37 +4572,21 @@ nullthrows@^1.1.1: resolved "https://registry.yarnpkg.com/nullthrows/-/nullthrows-1.1.1.tgz#7818258843856ae971eae4208ad7d7eb19a431b1" integrity sha512-2vPPEi+Z7WqML2jZYddDIfy5Dqb0r2fze2zTxNNknZaFpVHU3mFB3R+DWeJWGVx0ecvttSGlJTI+WG+8Z4cDWw== -ob1@0.70.3: - version "0.70.3" - resolved "https://registry.yarnpkg.com/ob1/-/ob1-0.70.3.tgz#f48cd5a5abf54b0c423b1b06b6d4ff4d049816cb" - integrity sha512-Vy9GGhuXgDRY01QA6kdhToPd8AkLdLpX9GjH5kpqluVqTu70mgOm7tpGoJDZGaNbr9nJlJgnipqHJQRPORixIQ== +ob1@0.72.4: + version "0.72.4" + resolved "https://registry.yarnpkg.com/ob1/-/ob1-0.72.4.tgz#d2ddedb09fb258d69490e8809157518a62b75506" + integrity sha512-/iPJKpXpVEZS0subUvjew4ept5LTBxj1hD20A4mAj9CJkGGPgvbBlfYtFEBubBkk4dv4Ef5lajsnRBYPxF74cQ== object-assign@^4.1.0, object-assign@^4.1.1: version "4.1.1" resolved "https://registry.yarnpkg.com/object-assign/-/object-assign-4.1.1.tgz#2109adc7965887cfc05cbbd442cac8bfbb360863" integrity sha512-rJgTQnkUnH1sFw8yT6VSU3zD3sWmu6sZhIseY8VX+GRu3P6F7Fu+JNDoXfklElbLJSnc3FUQHVe4cU5hj+BcUg== -object-copy@^0.1.0: - version "0.1.0" - resolved "https://registry.yarnpkg.com/object-copy/-/object-copy-0.1.0.tgz#7e7d858b781bd7c991a41ba975ed3812754e998c" - integrity sha512-79LYn6VAb63zgtmAteVOWo9Vdj71ZVBy3Pbse+VqxDpEP83XuujMrGqHIwAXJ5I/aM0zU7dIyIAhifVTPrNItQ== - dependencies: - copy-descriptor "^0.1.0" - define-property "^0.2.5" - kind-of "^3.0.3" - object-keys@^1.1.1: version "1.1.1" resolved "https://registry.yarnpkg.com/object-keys/-/object-keys-1.1.1.tgz#1c47f272df277f3b1daf061677d9c82e2322c60e" integrity sha512-NuAESUOUMrlIXOfHKzD6bpPu3tYt3xvjNdRIQ+FeT0lNb4K8WR70CaDxhuNguS2XG+GjkyMwOzsN5ZktImfhLA== -object-visit@^1.0.0: - version "1.0.1" - resolved "https://registry.yarnpkg.com/object-visit/-/object-visit-1.0.1.tgz#f79c4493af0c5377b59fe39d395e41042dd045bb" - integrity sha512-GBaMwwAVK9qbQN3Scdo0OyvgPW7l3lnaVMj84uTOZlswkX0KpF6fyDBJhtTthf7pymztoN36/KEr1DyhF96zEA== - dependencies: - isobject "^3.0.0" - object.assign@^4.1.0: version "4.1.2" resolved "https://registry.yarnpkg.com/object.assign/-/object.assign-4.1.2.tgz#0ed54a342eceb37b38ff76eb831a0e788cb63940" @@ -4901,13 +4597,6 @@ object.assign@^4.1.0: has-symbols "^1.0.1" object-keys "^1.1.1" -object.pick@^1.3.0: - version "1.3.0" - resolved "https://registry.yarnpkg.com/object.pick/-/object.pick-1.3.0.tgz#87a10ac4c1694bd2e1cbf53591a66141fb5dd747" - integrity sha512-tqa/UMy/CCoYmj+H5qc07qvSL9dqcs/WZENZ1JbtWBlATP+iVOe778gE6MSijnyCnORzDuX6hU+LA4SZ09YjFQ== - dependencies: - isobject "^3.0.1" - on-finished@2.4.1: version "2.4.1" resolved "https://registry.yarnpkg.com/on-finished/-/on-finished-2.4.1.tgz#58c8c44116e54845ad57f14ab10b03533184ac3f" @@ -5036,11 +4725,6 @@ parseurl@~1.3.3: resolved "https://registry.yarnpkg.com/parseurl/-/parseurl-1.3.3.tgz#9da19e7bee8d12dff0513ed5b76957793bc2e8d4" integrity sha512-CiyeOxFT/JZyN5m0z9PfXw4SCBJ6Sygz1Dpl0wqjlhDEGGBP1GnsUVEL0p63hoG1fcj3fHynXi9NYO4nWOL+qQ== -pascalcase@^0.1.1: - version "0.1.1" - resolved "https://registry.yarnpkg.com/pascalcase/-/pascalcase-0.1.1.tgz#b363e55e8006ca6fe21784d2db22bd15d7917f14" - integrity sha512-XHXfu/yOQRy9vYOtUDVMN60OEJjW013GoObG1o+xwQTpB9eYJX/BjXMsdW13ZDPruFhYYn0AG22w0xgQMwl3Nw== - path-exists@^3.0.0: version "3.0.0" resolved "https://registry.yarnpkg.com/path-exists/-/path-exists-3.0.0.tgz#ce0ebeaa5f78cb18925ea7d810d7b59b010fd515" @@ -5112,18 +4796,14 @@ pkg-up@^3.1.0: dependencies: find-up "^3.0.0" -plist@^3.0.2: - version "3.0.5" - resolved "https://registry.yarnpkg.com/plist/-/plist-3.0.5.tgz#2cbeb52d10e3cdccccf0c11a63a85d830970a987" - integrity sha512-83vX4eYdQp3vP9SxuYgEM/G/pJQqLUz/V/xzPrzruLs7fz7jxGQ1msZ/mg1nwZxUSuOp4sb+/bEIbRrbzZRxDA== +plist@^3.0.5: + version "3.1.0" + resolved "https://registry.yarnpkg.com/plist/-/plist-3.1.0.tgz#797a516a93e62f5bde55e0b9cc9c967f860893c9" + integrity sha512-uysumyrvkUX0rX/dEVqt8gC3sTBzd4zoWfLeS29nb53imdaXVvLINYXTI2GNqzaMuvacNx4uJQ8+b3zXR0pkgQ== dependencies: + "@xmldom/xmldom" "^0.8.8" base64-js "^1.5.1" - xmlbuilder "^9.0.7" - -posix-character-classes@^0.1.0: - version "0.1.1" - resolved "https://registry.yarnpkg.com/posix-character-classes/-/posix-character-classes-0.1.1.tgz#01eac0fe3b5af71a2a6c02feabb8c1fef7e00eab" - integrity sha512-xTgYBc3fuo7Yt7JbiuFxSYGToMoz8fLoE6TC9Wx1P/u+LfeThMOAqmuyECnlBaaJb+u1m9hHiXUEtwW4OzfUJg== + xmlbuilder "^15.1.1" pretty-format@^26.5.2, pretty-format@^26.6.2: version "26.6.2" @@ -5154,7 +4834,7 @@ promise-polyfill@^6.0.1: resolved "https://registry.yarnpkg.com/promise-polyfill/-/promise-polyfill-6.1.0.tgz#dfa96943ea9c121fca4de9b5868cb39d3472e057" integrity sha512-g0LWaH0gFsxovsU7R5LrrhHhWAWiHRnh1GPrhXnPgYsDkIqjRYUYSZEsej/wtleDrz5xVSIDbeKfidztp2XHFQ== -promise@^8.2.0: +promise@^8.3.0: version "8.3.0" resolved "https://registry.yarnpkg.com/promise/-/promise-8.3.0.tgz#8cb333d1edeb61ef23869fbb8a4ea0279ab60e0a" integrity sha512-rZPNPKTOYVNEEKFaq1HqTgOwZD+4/YHS5ukLzQCypkj+OkYx7iv0mA91lJlpPPZ8vMau3IIGj5Qlwrx+8iiSmg== @@ -5206,10 +4886,10 @@ range-parser@~1.2.1: resolved "https://registry.yarnpkg.com/range-parser/-/range-parser-1.2.1.tgz#3cf37023d199e1c24d1a55b84800c2f3e6468031" integrity sha512-Hrgsx+orqoygnmhFbKaHE6c296J+HTAQXoxEF6gNupROmmGJRoyzfG3ccAveqCBrwr/2yxQ5BVd/GTl5agOwSg== -react-devtools-core@4.24.0: - version "4.24.0" - resolved "https://registry.yarnpkg.com/react-devtools-core/-/react-devtools-core-4.24.0.tgz#7daa196bdc64f3626b3f54f2ff2b96f7c4fdf017" - integrity sha512-Rw7FzYOOzcfyUPaAm9P3g0tFdGqGq2LLiAI+wjYcp6CsF3DeeMrRS3HZAho4s273C29G/DJhx0e8BpRE/QZNGg== +react-devtools-core@4.27.7: + version "4.27.7" + resolved "https://registry.yarnpkg.com/react-devtools-core/-/react-devtools-core-4.27.7.tgz#458a6541483078d60a036c75bf88f54c478086ec" + integrity sha512-12N0HrhCPbD76Z7SkyJdGdXdPGouUsgV6tlEsbSpAnLDO06tjXZP+irht4wPdYwJAJRQ85DxL48eQoz7UmrSuQ== dependencies: shell-quote "^1.6.1" ws "^7" @@ -5224,14 +4904,14 @@ react-is@^17.0.1: resolved "https://registry.yarnpkg.com/react-is/-/react-is-17.0.2.tgz#e691d4a8e9c789365655539ab372762b0efb54f0" integrity sha512-w2GsyukL62IJnlaff/nRegPQR94C/XXamvMWmSHRJ4y7Ts/4ocGRmTHvOs8PSE6pB3dWOrD/nueuU5sduBsQ4w== -react-native-codegen@^0.69.2: - version "0.69.2" - resolved "https://registry.yarnpkg.com/react-native-codegen/-/react-native-codegen-0.69.2.tgz#e33ac3b1486de59ddae687b731ddbfcef8af0e4e" - integrity sha512-yPcgMHD4mqLbckqnWjFBaxomDnBREfRjDi2G/WxNyPBQLD+PXUEmZTkDx6QoOXN+Bl2SkpnNOSsLE2+/RUHoPw== +react-native-codegen@^0.70.7: + version "0.70.7" + resolved "https://registry.yarnpkg.com/react-native-codegen/-/react-native-codegen-0.70.7.tgz#8f6b47a88740ae703209d57b7605538d86dacfa6" + integrity sha512-qXE8Jrhc9BmxDAnCmrHFDLJrzgjsE/mH57dtC4IO7K76AwagdXNCMRp5SA8XdHJzvvHWRaghpiFHEMl9TtOBcQ== dependencies: "@babel/parser" "^7.14.0" flow-parser "^0.121.0" - jscodeshift "^0.13.1" + jscodeshift "^0.14.0" nullthrows "^1.1.1" react-native-fs@^2.20.0: @@ -5242,20 +4922,20 @@ react-native-fs@^2.20.0: base-64 "^0.1.0" utf8 "^3.0.0" -react-native-gradle-plugin@^0.0.7: - version "0.0.7" - resolved "https://registry.yarnpkg.com/react-native-gradle-plugin/-/react-native-gradle-plugin-0.0.7.tgz#96602f909745239deab7b589443f14fce5da2056" - integrity sha512-+4JpbIx42zGTONhBTIXSyfyHICHC29VTvhkkoUOJAh/XHPEixpuBduYgf6Y4y9wsN1ARlQhBBoptTvXvAFQf5g== +react-native-gradle-plugin@^0.70.3: + version "0.70.3" + resolved "https://registry.yarnpkg.com/react-native-gradle-plugin/-/react-native-gradle-plugin-0.70.3.tgz#cbcf0619cbfbddaa9128701aa2d7b4145f9c4fc8" + integrity sha512-oOanj84fJEXUg9FoEAQomA8ISG+DVIrTZ3qF7m69VQUJyOGYyDZmPqKcjvRku4KXlEH6hWO9i4ACLzNBh8gC0A== -react-native@^0.69.1: - version "0.69.8" - resolved "https://registry.yarnpkg.com/react-native/-/react-native-0.69.8.tgz#3d9b47c42c100455850b47859ff12b66c5ffb689" - integrity sha512-sHaIypVD9avdHQpvmVXz4vPy6IECKNfWCz2E5DmiC6NWKxGd+tkNMBMIOl7dATdpAiD19FUiu3EyBVdO1vqTUw== +react-native@^0.70.15: + version "0.70.15" + resolved "https://registry.yarnpkg.com/react-native/-/react-native-0.70.15.tgz#65f2c5c399ff8e2a892cef9b094cc0888653a874" + integrity sha512-pm2ZPpA+m0Kl0THAy2fptnp7B9+QPexpfad9fSXfqjPufrXG2alwW8kYCn2EO5ZUX6bomZjFEswz6RzdRN/p9A== dependencies: "@jest/create-cache-key-function" "^27.0.1" - "@react-native-community/cli" "^8.0.4" - "@react-native-community/cli-platform-android" "^8.0.4" - "@react-native-community/cli-platform-ios" "^8.0.4" + "@react-native-community/cli" "9.3.5" + "@react-native-community/cli-platform-android" "9.3.4" + "@react-native-community/cli-platform-ios" "9.3.0" "@react-native/assets" "1.0.0" "@react-native/normalize-color" "2.0.0" "@react-native/polyfills" "2.0.0" @@ -5263,24 +4943,23 @@ react-native@^0.69.1: anser "^1.4.9" base64-js "^1.1.2" event-target-shim "^5.0.1" - hermes-engine "~0.11.0" invariant "^2.2.4" jsc-android "^250230.2.1" memoize-one "^5.0.0" - metro-react-native-babel-transformer "0.70.3" - metro-runtime "0.70.3" - metro-source-map "0.70.3" + metro-react-native-babel-transformer "0.72.4" + metro-runtime "0.72.4" + metro-source-map "0.72.4" mkdirp "^0.5.1" nullthrows "^1.1.1" pretty-format "^26.5.2" - promise "^8.2.0" - react-devtools-core "4.24.0" - react-native-codegen "^0.69.2" - react-native-gradle-plugin "^0.0.7" + promise "^8.3.0" + react-devtools-core "4.27.7" + react-native-codegen "^0.70.7" + react-native-gradle-plugin "^0.70.3" react-refresh "^0.4.0" - react-shallow-renderer "16.15.0" + react-shallow-renderer "^16.15.0" regenerator-runtime "^0.13.2" - scheduler "^0.21.0" + scheduler "^0.22.0" stacktrace-parser "^0.1.3" use-sync-external-store "^1.0.0" whatwg-fetch "^3.0.0" @@ -5291,7 +4970,7 @@ react-refresh@^0.4.0: resolved "https://registry.yarnpkg.com/react-refresh/-/react-refresh-0.4.3.tgz#966f1750c191672e76e16c2efa569150cc73ab53" integrity sha512-Hwln1VNuGl/6bVwnd0Xdn1e84gT/8T9aYNL+HAKDArLCS7LWjwr7StE30IEYbIkx0Vi3vs+coQxe+SQDbGbbpA== -react-shallow-renderer@16.15.0: +react-shallow-renderer@^16.15.0: version "16.15.0" resolved "https://registry.yarnpkg.com/react-shallow-renderer/-/react-shallow-renderer-16.15.0.tgz#48fb2cf9b23d23cde96708fe5273a7d3446f4457" integrity sha512-oScf2FqQ9LFVQgA73vr86xl2NaOIX73rh+YFqcOp68CWj56tSfgtGKrEbyhCj0rSijyG9M1CYprTh39fBi5hzA== @@ -5346,12 +5025,12 @@ readline@^1.3.0: resolved "https://registry.yarnpkg.com/readline/-/readline-1.3.0.tgz#c580d77ef2cfc8752b132498060dc9793a7ac01c" integrity sha512-k2d6ACCkiNYz222Fs/iNze30rRJ1iIicW7JuX/7/cozvih6YCkFZH+J6mAFDVgv0dRBaAyr4jDqC95R2y4IADg== -recast@^0.20.4: - version "0.20.5" - resolved "https://registry.yarnpkg.com/recast/-/recast-0.20.5.tgz#8e2c6c96827a1b339c634dd232957d230553ceae" - integrity sha512-E5qICoPoNL4yU0H0NoBDntNB0Q5oMSNh9usFctYniLBluTthi3RsQVBXIJNbApOlvSwW/RGxIuokPcAc59J5fQ== +recast@^0.21.0: + version "0.21.5" + resolved "https://registry.yarnpkg.com/recast/-/recast-0.21.5.tgz#e8cd22bb51bcd6130e54f87955d33a2b2e57b495" + integrity sha512-hjMmLaUXAm1hIuTqOdeYObMslq/q+Xff6QE3Y2P+uoHAg2nmVlLBps2hzh1UJDdMtDTMXOFewK6ky51JQIeECg== dependencies: - ast-types "0.14.2" + ast-types "0.15.2" esprima "~4.0.0" source-map "~0.6.1" tslib "^2.0.1" @@ -5373,21 +5052,6 @@ regenerator-runtime@^0.13.2, regenerator-runtime@^0.13.4: resolved "https://registry.yarnpkg.com/regenerator-runtime/-/regenerator-runtime-0.13.9.tgz#8925742a98ffd90814988d7566ad30ca3b263b52" integrity sha512-p3VT+cOEgxFsRRA9X4lkI1E+k2/CtnKtU4gcxyaCUreilL/vqI6CdZ3wxVUx3UOUg+gnUOQQcRI7BmSI656MYA== -regenerator-transform@^0.15.0: - version "0.15.0" - resolved "https://registry.yarnpkg.com/regenerator-transform/-/regenerator-transform-0.15.0.tgz#cbd9ead5d77fae1a48d957cf889ad0586adb6537" - integrity sha512-LsrGtPmbYg19bcPHwdtmXwbW+TqNvtY4riE3P83foeHRroMbH6/2ddFBfab3t7kbzc7v7p4wbkIecHImqt0QNg== - dependencies: - "@babel/runtime" "^7.8.4" - -regex-not@^1.0.0, regex-not@^1.0.2: - version "1.0.2" - resolved "https://registry.yarnpkg.com/regex-not/-/regex-not-1.0.2.tgz#1f4ece27e00b0b65e0247a6810e6a85d83a5752c" - integrity sha512-J6SDjUgDxQj5NusnOtdFxDwN/+HWykR8GELwctJ7mdqhcyy1xEc4SRFHUXvxTp661YaVKAjfRLZ9cCqS6tn32A== - dependencies: - extend-shallow "^3.0.2" - safe-regex "^1.1.0" - regexpu-core@^5.0.1: version "5.0.1" resolved "https://registry.yarnpkg.com/regexpu-core/-/regexpu-core-5.0.1.tgz#c531122a7840de743dcf9c83e923b5560323ced3" @@ -5424,16 +5088,6 @@ regjsparser@^0.8.2: dependencies: jsesc "~0.5.0" -repeat-element@^1.1.2: - version "1.1.4" - resolved "https://registry.yarnpkg.com/repeat-element/-/repeat-element-1.1.4.tgz#be681520847ab58c7568ac75fbfad28ed42d39e9" - integrity sha512-LFiNfRcSu7KK3evMyYOuCzv3L10TW7yC1G2/+StMjK8Y6Vqd2MG7r/Qjw4ghtuCOjFvlnms/iMmLqpvW/ES/WQ== - -repeat-string@^1.6.1: - version "1.6.1" - resolved "https://registry.yarnpkg.com/repeat-string/-/repeat-string-1.6.1.tgz#8dcae470e1c88abc2d600fff4a776286da75e637" - integrity sha512-PV0dzCYDNfRi1jCDbJzpW7jNNDRuCOG/jI5ctQcGKt/clZD+YcPS3yIlWuTJMmESC8aevCFmWJy5wjAFgNqN6w== - require-directory@^2.1.1: version "2.1.1" resolved "https://registry.yarnpkg.com/require-directory/-/require-directory-2.1.1.tgz#8c64ad5fd30dab1c976e2344ffe7f792a6a6df42" @@ -5471,11 +5125,6 @@ resolve-from@^5.0.0: resolved "https://registry.yarnpkg.com/resolve-from/-/resolve-from-5.0.0.tgz#c35225843df8f776df21c57557bc087e9dfdfc69" integrity sha512-qYg9KP24dD5qka9J47d0aVky0N+b4fTU89LN9iDnjB5waksiC49rvMB0PrUJQGoTmH50XPiqOvAjDfaijGxYZw== -resolve-url@^0.2.1: - version "0.2.1" - resolved "https://registry.yarnpkg.com/resolve-url/-/resolve-url-0.2.1.tgz#2c637fe77c893afd2a663fe21aa9080068e2052a" - integrity sha512-ZuF55hVUQaaczgOIwqWzkEcEidmlD/xl44x1UZnhOXcYuFN2S6+rcxpG+C1N3So0wvNI3DmJICUFfu2SxhBmvg== - resolve.exports@^2.0.0: version "2.0.2" resolved "https://registry.yarnpkg.com/resolve.exports/-/resolve.exports-2.0.2.tgz#f8c934b8e6a13f539e38b7098e2e36134f01e800" @@ -5507,11 +5156,6 @@ restore-cursor@^3.1.0: onetime "^5.1.0" signal-exit "^3.0.2" -ret@~0.1.10: - version "0.1.15" - resolved "https://registry.yarnpkg.com/ret/-/ret-0.1.15.tgz#b8a4825d5bdb1fc3f6f53c2bc33f81388681c7bc" - integrity sha512-TTlYpa+OL+vMMNG24xSlQGEJ3B/RzEfUlLct7b5G/ytav+wPrplCpVMFuwzXbkecJrb6IYo1iFb0S9v37754mg== - retry@^0.12.0: version "0.12.0" resolved "https://registry.yarnpkg.com/retry/-/retry-0.12.0.tgz#1b42a6266a21f07421d1b0b54b7dc167b01c013b" @@ -5558,13 +5202,6 @@ safe-json-stringify@~1: resolved "https://registry.yarnpkg.com/safe-json-stringify/-/safe-json-stringify-1.2.0.tgz#356e44bc98f1f93ce45df14bcd7c01cda86e0afd" integrity sha512-gH8eh2nZudPQO6TytOvbxnuhYBOvDBBLW52tz5q6X58lJcd/tkmqFR+5Z9adS8aJtURSXWThWy/xJtJwixErvg== -safe-regex@^1.1.0: - version "1.1.0" - resolved "https://registry.yarnpkg.com/safe-regex/-/safe-regex-1.1.0.tgz#40a3669f3b077d1e943d44629e157dd48023bf2e" - integrity sha512-aJXcif4xnaNUzvUuC5gcb46oTS7zvg4jpMTnuqtrEPlR3vFr4pxtdTwaF1Qs3Enjn9HK+ZlwQui+a7z0SywIzg== - dependencies: - ret "~0.1.10" - sanitize-filename@^1.6.1: version "1.6.3" resolved "https://registry.yarnpkg.com/sanitize-filename/-/sanitize-filename-1.6.3.tgz#755ebd752045931977e30b2025d340d7c9090378" @@ -5572,10 +5209,15 @@ sanitize-filename@^1.6.1: dependencies: truncate-utf8-bytes "^1.0.0" -scheduler@^0.21.0: - version "0.21.0" - resolved "https://registry.yarnpkg.com/scheduler/-/scheduler-0.21.0.tgz#6fd2532ff5a6d877b6edb12f00d8ab7e8f308820" - integrity sha512-1r87x5fz9MXqswA2ERLo0EbOAU74DpIUO090gIasYTqlVoJeMcl+Z1Rg7WHz+qtPujhS/hGIt9kxZOYBV3faRQ== +sax@>=0.6.0: + version "1.4.1" + resolved "https://registry.yarnpkg.com/sax/-/sax-1.4.1.tgz#44cc8988377f126304d3b3fc1010c733b929ef0f" + integrity sha512-+aWOz7yVScEGoKNd4PA10LZ8sk0A/z5+nXQG5giUO5rprX9jgYsTdov9qCchZiPIZezbZH+jRut8nPodFAX4Jg== + +scheduler@^0.22.0: + version "0.22.0" + resolved "https://registry.yarnpkg.com/scheduler/-/scheduler-0.22.0.tgz#83a5d63594edf074add9a7198b1bae76c3db01b8" + integrity sha512-6QAm1BgQI88NPYymgGQLCZgvep4FyePDWFpXVK+zNSUgHwlqpJy8VEh8Et0KxTACS4VWwMousBElAZOH9nkkoQ== dependencies: loose-envify "^1.1.0" @@ -5601,6 +5243,11 @@ semver@^7.0.0, semver@^7.3.5: dependencies: lru-cache "^6.0.0" +semver@^7.5.3: + version "7.6.3" + resolved "https://registry.yarnpkg.com/semver/-/semver-7.6.3.tgz#980f7b5550bc175fb4dc09403085627f9eb33143" + integrity sha512-oVekP1cKtI+CTDvHWYFUcMtsK/00wmAEfyqKfNdARm8u1wNVhSgaX7A8d4UuIlUI5e84iEwOhs7ZPYRmzU9U6A== + send@0.18.0: version "0.18.0" resolved "https://registry.yarnpkg.com/send/-/send-0.18.0.tgz#670167cc654b05f5aa4a767f9113bb371bc706be" @@ -5647,16 +5294,6 @@ set-blocking@^2.0.0: resolved "https://registry.yarnpkg.com/set-blocking/-/set-blocking-2.0.0.tgz#045f9782d011ae9a6803ddd382b24392b3d890f7" integrity sha512-KiKBS8AnWGEyLzofFfmvKwpdPzqiy16LvQfK3yv/fVH7Bj13/wl3JSR1J+rfgRE9q7xUJK4qvgS8raSOeLUehw== -set-value@^2.0.0, set-value@^2.0.1: - version "2.0.1" - resolved "https://registry.yarnpkg.com/set-value/-/set-value-2.0.1.tgz#a18d40530e6f07de4228c7defe4227af8cad005b" - integrity sha512-JxHc1weCN68wRY0fhCoXpyK55m/XPHafOmK4UWD7m2CI14GMcFypt4w/0+NV5f/ZMby2F6S2wwA7fgynh9gWSw== - dependencies: - extend-shallow "^2.0.1" - is-extendable "^0.1.1" - is-plain-object "^2.0.3" - split-string "^3.0.1" - setprototypeof@1.2.0: version "1.2.0" resolved "https://registry.yarnpkg.com/setprototypeof/-/setprototypeof-1.2.0.tgz#66c9a24a73f9fc28cbe66b09fed3d33dcaf1b424" @@ -5708,6 +5345,15 @@ signal-exit@^3.0.0, signal-exit@^3.0.2, signal-exit@^3.0.3, signal-exit@^3.0.7: resolved "https://registry.yarnpkg.com/signal-exit/-/signal-exit-3.0.7.tgz#a9a1767f8af84155114eaabd73f99273c8f59ad9" integrity sha512-wnD2ZE+l+SPC/uoS0vXeE9L1+0wuaMqKlfz9AMUo38JsyLSBWSFcHR1Rri62LZc12vLr1gb3jl7iwQhgwpAbGQ== +simple-plist@^1.1.0: + version "1.3.1" + resolved "https://registry.yarnpkg.com/simple-plist/-/simple-plist-1.3.1.tgz#16e1d8f62c6c9b691b8383127663d834112fb017" + integrity sha512-iMSw5i0XseMnrhtIzRb7XpQEXepa9xhWxGUojHBL43SIpQuDQkh3Wpy67ZbDzZVr6EKxvwVChnVpdl8hEVLDiw== + dependencies: + bplist-creator "0.1.0" + bplist-parser "0.3.1" + plist "^3.0.5" + sisteransi@^1.0.5: version "1.0.5" resolved "https://registry.yarnpkg.com/sisteransi/-/sisteransi-1.0.5.tgz#134d681297756437cc05ca01370d3a7a571075ed" @@ -5727,46 +5373,10 @@ slice-ansi@^2.0.0: astral-regex "^1.0.0" is-fullwidth-code-point "^2.0.0" -snapdragon-node@^2.0.1: - version "2.1.1" - resolved "https://registry.yarnpkg.com/snapdragon-node/-/snapdragon-node-2.1.1.tgz#6c175f86ff14bdb0724563e8f3c1b021a286853b" - integrity sha512-O27l4xaMYt/RSQ5TR3vpWCAB5Kb/czIcqUFOM/C4fYcLnbZUc1PkjTAMjof2pBWaSTwOUd6qUHcFGVGj7aIwnw== - dependencies: - define-property "^1.0.0" - isobject "^3.0.0" - snapdragon-util "^3.0.1" - -snapdragon-util@^3.0.1: - version "3.0.1" - resolved "https://registry.yarnpkg.com/snapdragon-util/-/snapdragon-util-3.0.1.tgz#f956479486f2acd79700693f6f7b805e45ab56e2" - integrity sha512-mbKkMdQKsjX4BAL4bRYTj21edOf8cN7XHdYUJEe+Zn99hVEYcMvKPct1IqNe7+AZPirn8BCDOQBHQZknqmKlZQ== - dependencies: - kind-of "^3.2.0" - -snapdragon@^0.8.1: - version "0.8.2" - resolved "https://registry.yarnpkg.com/snapdragon/-/snapdragon-0.8.2.tgz#64922e7c565b0e14204ba1aa7d6964278d25182d" - integrity sha512-FtyOnWN/wCHTVXOMwvSv26d+ko5vWlIDD6zoUJ7LW8vh+ZBC8QdljveRP+crNrtBwioEUWy/4dMtbBjA4ioNlg== - dependencies: - base "^0.11.1" - debug "^2.2.0" - define-property "^0.2.5" - extend-shallow "^2.0.1" - map-cache "^0.2.2" - source-map "^0.5.6" - source-map-resolve "^0.5.0" - use "^3.1.0" - -source-map-resolve@^0.5.0: - version "0.5.3" - resolved "https://registry.yarnpkg.com/source-map-resolve/-/source-map-resolve-0.5.3.tgz#190866bece7553e1f8f267a2ee82c606b5509a1a" - integrity sha512-Htz+RnsXWk5+P2slx5Jh3Q66vhQj1Cllm0zvnaY98+NFx+Dv2CF/f5O/t8x+KaNdrdIAsruNzoh/KpialbqAnw== - dependencies: - atob "^2.1.2" - decode-uri-component "^0.2.0" - resolve-url "^0.2.1" - source-map-url "^0.4.0" - urix "^0.1.0" +slugify@^1.6.6: + version "1.6.6" + resolved "https://registry.yarnpkg.com/slugify/-/slugify-1.6.6.tgz#2d4ac0eacb47add6af9e04d3be79319cbcc7924b" + integrity sha512-h+z7HKHYXj6wJU+AnS/+IH8Uh9fdcX1Lrhg1/VMdf9PwoBQXFcXiAdsy2tSK0P6gKwJLXp02r90ahUCqHk9rrw== source-map-support@0.5.13: version "0.5.13" @@ -5784,11 +5394,6 @@ source-map-support@^0.5.16: buffer-from "^1.0.0" source-map "^0.6.0" -source-map-url@^0.4.0: - version "0.4.1" - resolved "https://registry.yarnpkg.com/source-map-url/-/source-map-url-0.4.1.tgz#0af66605a745a5a2f91cf1bbf8a7afbc283dec56" - integrity sha512-cPiFOTLUKvJFIg4SKVScy4ilPPW6rFgMgfuZJPNoDuMs3nC1HbMUycBoJw77xFIp6z1UJQJOfx6C9GMH80DiTw== - source-map@^0.5.6: version "0.5.7" resolved "https://registry.yarnpkg.com/source-map/-/source-map-0.5.7.tgz#8a039d2d1021d22d1ea14c80d8ea468ba2ef3fcc" @@ -5804,13 +5409,6 @@ source-map@^0.7.3: resolved "https://registry.yarnpkg.com/source-map/-/source-map-0.7.4.tgz#a9bbe705c9d8846f4e08ff6765acf0f1b0898656" integrity sha512-l3BikUxvPOcn5E74dZiq5BGsTb5yEwhaTSzccU6t4sDOH8NWJCstKO5QT2CvtFoK6F0saL7p9xHAqHOlCPJygA== -split-string@^3.0.1, split-string@^3.0.2: - version "3.1.0" - resolved "https://registry.yarnpkg.com/split-string/-/split-string-3.1.0.tgz#7cb09dda3a86585705c64b39a6466038682e8fe2" - integrity sha512-NzNVhJDYpwceVVii8/Hu6DKfD2G+NrQHlS/V/qgv763EYudVwEcMQNxd2lh+0VrUByXN/oJkl5grOhYWvQUYiw== - dependencies: - extend-shallow "^3.0.0" - sprintf-js@~1.0.2: version "1.0.3" resolved "https://registry.yarnpkg.com/sprintf-js/-/sprintf-js-1.0.3.tgz#04e6926f662895354f3dd015203633b857297e2c" @@ -5835,14 +5433,6 @@ stacktrace-parser@^0.1.3: dependencies: type-fest "^0.7.1" -static-extend@^0.1.1: - version "0.1.2" - resolved "https://registry.yarnpkg.com/static-extend/-/static-extend-0.1.2.tgz#60809c39cbff55337226fd5e0b520f341f1fb5c6" - integrity sha512-72E9+uLc27Mt718pMHt9VMNiAL4LMsmDbBva8mxWUCkT07fSzEGMYUCk0XWY6lp0j6RBAG4cJ3mWuZv2OE3s0g== - dependencies: - define-property "^0.2.5" - object-copy "^0.1.0" - statuses@2.0.1: version "2.0.1" resolved "https://registry.yarnpkg.com/statuses/-/statuses-2.0.1.tgz#55cb000ccf1d48728bd23c685a063998cf1a1b63" @@ -5853,6 +5443,11 @@ statuses@~1.5.0: resolved "https://registry.yarnpkg.com/statuses/-/statuses-1.5.0.tgz#161c7dac177659fd9811f43771fa99381478628c" integrity sha512-OpZ3zP+jT1PI7I8nemJX4AKmAX070ZkYPVWV/AaKTJl+tXCTGyVdC1a4SL8RUQYEwk/f34ZX8UTykN68FwrqAA== +stream-buffers@2.2.x: + version "2.2.0" + resolved "https://registry.yarnpkg.com/stream-buffers/-/stream-buffers-2.2.0.tgz#91d5f5130d1cef96dcfa7f726945188741d09ee4" + integrity sha512-uyQK/mx5QjHun80FLJTfaWE7JtwfRMKBLkMne6udYOmvH0CawotVa7TfgYHzAnpphn4+TweIx1QKMnRIbipmUg== + stream-chain@^2.2.5: version "2.2.5" resolved "https://registry.yarnpkg.com/stream-chain/-/stream-chain-2.2.5.tgz#b30967e8f14ee033c5b9a19bbe8a2cba90ba0d09" @@ -6028,21 +5623,6 @@ to-fast-properties@^2.0.0: resolved "https://registry.yarnpkg.com/to-fast-properties/-/to-fast-properties-2.0.0.tgz#dc5e698cbd079265bc73e0377681a4e4e83f616e" integrity sha512-/OaKK0xYrs3DmxRYqL/yDc+FxFUVYhDlXMhRmv3z915w2HF1tnN1omB354j8VUGO/hbRzyD6Y3sA7v7GS/ceog== -to-object-path@^0.3.0: - version "0.3.0" - resolved "https://registry.yarnpkg.com/to-object-path/-/to-object-path-0.3.0.tgz#297588b7b0e7e0ac08e04e672f85c1f4999e17af" - integrity sha512-9mWHdnGRuh3onocaHzukyvCZhzvr6tiflAy/JRFXcJX0TjgfWA9pk9t8CMbzmBE4Jfw58pXbkngtBtqYxzNEyg== - dependencies: - kind-of "^3.0.2" - -to-regex-range@^2.1.0: - version "2.1.1" - resolved "https://registry.yarnpkg.com/to-regex-range/-/to-regex-range-2.1.1.tgz#7c80c17b9dfebe599e27367e0d4dd5590141db38" - integrity sha512-ZZWNfCjUokXXDGXFpZehJIkZqq91BcULFq/Pi7M5i4JnxXdhMKAK682z8bCW3o8Hj1wuuzoKcW3DfVzaP6VuNg== - dependencies: - is-number "^3.0.0" - repeat-string "^1.6.1" - to-regex-range@^5.0.1: version "5.0.1" resolved "https://registry.yarnpkg.com/to-regex-range/-/to-regex-range-5.0.1.tgz#1648c44aae7c8d988a326018ed72f5b4dd0392e4" @@ -6050,16 +5630,6 @@ to-regex-range@^5.0.1: dependencies: is-number "^7.0.0" -to-regex@^3.0.1, to-regex@^3.0.2: - version "3.0.2" - resolved "https://registry.yarnpkg.com/to-regex/-/to-regex-3.0.2.tgz#13cfdd9b336552f30b51f33a8ae1b42a7a7599ce" - integrity sha512-FWtleNAtZ/Ki2qtqej2CXTOayOH9bHDQF+Q48VpWyDXjbYxA4Yz8iDB31zXOBUlOHHKidDbqGVrTUvQMPmBGBw== - dependencies: - define-property "^2.0.2" - extend-shallow "^3.0.2" - regex-not "^1.0.2" - safe-regex "^1.1.0" - toidentifier@1.0.1: version "1.0.1" resolved "https://registry.yarnpkg.com/toidentifier/-/toidentifier-1.0.1.tgz#3be34321a88a820ed1bd80dfaa33e479fbb8dd35" @@ -6141,16 +5711,6 @@ unicode-property-aliases-ecmascript@^2.0.0: resolved "https://registry.yarnpkg.com/unicode-property-aliases-ecmascript/-/unicode-property-aliases-ecmascript-2.0.0.tgz#0a36cb9a585c4f6abd51ad1deddb285c165297c8" integrity sha512-5Zfuy9q/DFr4tfO7ZPeVXb1aPoeQSdeFMLpYuFebehDAhbuevLs5yxSZmIFN1tP5F9Wl4IpJrYojg85/zgyZHQ== -union-value@^1.0.0: - version "1.0.1" - resolved "https://registry.yarnpkg.com/union-value/-/union-value-1.0.1.tgz#0b6fe7b835aecda61c6ea4d4f02c14221e109847" - integrity sha512-tJfXmxMeWYnczCVs7XAEvIV7ieppALdyepWMkHkwciRpZraG/xwT+s2JN8+pr1+8jCRf80FFzvr+MpQeeoF4Xg== - dependencies: - arr-union "^3.1.0" - get-value "^2.0.6" - is-extendable "^0.1.1" - set-value "^2.0.1" - universalify@^0.1.0: version "0.1.2" resolved "https://registry.yarnpkg.com/universalify/-/universalify-0.1.2.tgz#b646f69be3942dabcecc9d6639c80dc105efaa66" @@ -6161,14 +5721,6 @@ unpipe@~1.0.0: resolved "https://registry.yarnpkg.com/unpipe/-/unpipe-1.0.0.tgz#b2bf4ee8514aae6165b4817829d21b2ef49904ec" integrity sha512-pjy2bYhSsufwWlKwPc+l3cN7+wuJlK6uz0YdJEOlQDbl6jo/YlPi4mb8agUkVC8BF7V8NuzeyPNqRksA3hztKQ== -unset-value@^1.0.0: - version "1.0.0" - resolved "https://registry.yarnpkg.com/unset-value/-/unset-value-1.0.0.tgz#8376873f7d2335179ffb1e6fc3a8ed0dfc8ab559" - integrity sha512-PcA2tsuGSF9cnySLHTLSh2qrQiJ70mn+r+Glzxv2TWZblxsxCC52BDlZoPCsz7STd9pN7EZetkWZBAvk4cgZdQ== - dependencies: - has-value "^0.3.1" - isobject "^3.0.0" - update-browserslist-db@^1.0.10: version "1.0.10" resolved "https://registry.yarnpkg.com/update-browserslist-db/-/update-browserslist-db-1.0.10.tgz#0f54b876545726f17d00cd9a2561e6dade943ff3" @@ -6184,21 +5736,11 @@ uri-js@^4.2.2: dependencies: punycode "^2.1.0" -urix@^0.1.0: - version "0.1.0" - resolved "https://registry.yarnpkg.com/urix/-/urix-0.1.0.tgz#da937f7a62e21fec1fd18d49b35c2935067a6c72" - integrity sha512-Am1ousAhSLBeB9cG/7k7r2R0zj50uDRlZHPGbazid5s9rlF1F/QKYObEKSIunSjIOkJZqwRRLpvewjEkM7pSqg== - use-sync-external-store@^1.0.0: version "1.2.0" resolved "https://registry.yarnpkg.com/use-sync-external-store/-/use-sync-external-store-1.2.0.tgz#7dbefd6ef3fe4e767a0cf5d7287aacfb5846928a" integrity sha512-eEgnFxGQ1Ife9bzYs6VLi8/4X6CObHMw9Qr9tPY43iKwsPw8xE8+EFsf/2cFZ5S3esXgpWgtSCtLNS41F+sKPA== -use@^3.1.0: - version "3.1.1" - resolved "https://registry.yarnpkg.com/use/-/use-3.1.1.tgz#d50c8cac79a19fbc20f2911f56eb973f4e10070f" - integrity sha512-cwESVXlO3url9YWlFW/TA9cshCEhtu7IKJ/p5soJ/gGpj7vbvFrAY/eIioQ6Dw23KjZhYgiIo8HOs1nQ2vr/oQ== - utf8-byte-length@^1.0.1: version "1.0.4" resolved "https://registry.yarnpkg.com/utf8-byte-length/-/utf8-byte-length-1.0.4.tgz#f45f150c4c66eee968186505ab93fcbb8ad6bf61" @@ -6224,6 +5766,16 @@ uuid@^3.0.1: resolved "https://registry.yarnpkg.com/uuid/-/uuid-3.4.0.tgz#b23e4358afa8a202fe7a100af1f5f883f02007ee" integrity sha512-HjSDRw6gZE5JMggctHBcjVak08+KEVhSIiDzFnT9S9aegmp85S/bReBVTb4QTFaRNptJ9kuYaNhnbNEOkbKb/A== +uuid@^7.0.3: + version "7.0.3" + resolved "https://registry.yarnpkg.com/uuid/-/uuid-7.0.3.tgz#c5c9f2c8cf25dc0a372c4df1441c41f5bd0c680b" + integrity sha512-DPSke0pXhTZgoF/d+WSt2QaKMCFSfx7QegxEWT+JOuHF5aWrKEn0G+ztjuJg/gG8/ItK+rbPCD/yNv8yyih6Cg== + +uuid@^8.3.2: + version "8.3.2" + resolved "https://registry.yarnpkg.com/uuid/-/uuid-8.3.2.tgz#80d5b5ced271bb9af6c445f21a1a04c606cefbe2" + integrity sha512-+NYs2QeMWy+GWFOEm9xnn6HCDp0l7QBD7ml8zLUmJ+93Q5NF0NocErnwkTkXVFNiX3/fpC6afS8Dhb/gz7R7eg== + v8-to-istanbul@^9.0.1: version "9.1.0" resolved "https://registry.yarnpkg.com/v8-to-istanbul/-/v8-to-istanbul-9.1.0.tgz#1b83ed4e397f58c85c266a570fc2558b5feb9265" @@ -6351,10 +5903,41 @@ ws@^7.0.0: resolved "https://registry.yarnpkg.com/ws/-/ws-7.5.9.tgz#54fa7db29f4c7cec68b1ddd3a89de099942bb591" integrity sha512-F+P9Jil7UiSKSkppIiD94dN07AwvFixvLIj1Og1Rl9GGMuNipJnV9JzjD6XuqmAeiswGvUmNLjr5cFuXwNS77Q== -xmlbuilder@^9.0.7: - version "9.0.7" - resolved "https://registry.yarnpkg.com/xmlbuilder/-/xmlbuilder-9.0.7.tgz#132ee63d2ec5565c557e20f4c22df9aca686b10d" - integrity sha512-7YXTQc3P2l9+0rjaUbLwMKRhtmwg1M1eDf6nag7urC7pIPYLD9W/jmzQ4ptRSUbodw5S0jfoGTflLemQibSpeQ== +xcode@^3.0.1: + version "3.0.1" + resolved "https://registry.yarnpkg.com/xcode/-/xcode-3.0.1.tgz#3efb62aac641ab2c702458f9a0302696146aa53c" + integrity sha512-kCz5k7J7XbJtjABOvkc5lJmkiDh8VhjVCGNiqdKCscmVpdVUpEAyXv1xmCLkQJ5dsHqx3IPO4XW+NTDhU/fatA== + dependencies: + simple-plist "^1.1.0" + uuid "^7.0.3" + +xml2js@0.6.0: + version "0.6.0" + resolved "https://registry.yarnpkg.com/xml2js/-/xml2js-0.6.0.tgz#07afc447a97d2bd6507a1f76eeadddb09f7a8282" + integrity sha512-eLTh0kA8uHceqesPqSE+VvO1CDDJWMwlQfB6LuN6T8w6MaDJ8Txm8P7s5cHD0miF0V+GGTZrDQfxPZQVsur33w== + dependencies: + sax ">=0.6.0" + xmlbuilder "~11.0.0" + +xml@^1.0.1: + version "1.0.1" + resolved "https://registry.yarnpkg.com/xml/-/xml-1.0.1.tgz#78ba72020029c5bc87b8a81a3cfcd74b4a2fc1e5" + integrity sha512-huCv9IH9Tcf95zuYCsQraZtWnJvBtLVE0QHMOs8bWyZAFZNDcYjsPq1nEx8jKA9y+Beo9v+7OBPRisQTjinQMw== + +xmlbuilder@^14.0.0: + version "14.0.0" + resolved "https://registry.yarnpkg.com/xmlbuilder/-/xmlbuilder-14.0.0.tgz#876b5aec4f05ffd5feb97b0a871c855d16fbeb8c" + integrity sha512-ts+B2rSe4fIckR6iquDjsKbQFK2NlUk6iG5nf14mDEyldgoc2nEKZ3jZWMPTxGQwVgToSjt6VGIho1H8/fNFTg== + +xmlbuilder@^15.1.1: + version "15.1.1" + resolved "https://registry.yarnpkg.com/xmlbuilder/-/xmlbuilder-15.1.1.tgz#9dcdce49eea66d8d10b42cae94a79c3c8d0c2ec5" + integrity sha512-yMqGBqtXyeN1e3TGYvgNgDVZ3j84W4cwkOXQswghol6APgZWaff9lnbvN7MHYJOiXsvGPXtjTYJEiC9J2wv9Eg== + +xmlbuilder@~11.0.0: + version "11.0.1" + resolved "https://registry.yarnpkg.com/xmlbuilder/-/xmlbuilder-11.0.1.tgz#be9bae1c8a046e76b31127726347d0ad7002beb3" + integrity sha512-fDlsI/kFEx7gLvbecc0/ohLG50fugQp8ryHzMTuW9vSa1GJ0XYWKnhsUx7oie3G98+r56aTQIUB4kht42R3JvA== xtend@~4.0.1: version "4.0.2" diff --git a/js/react_native/ios/OnnxruntimeModule.mm b/js/react_native/ios/OnnxruntimeModule.mm index 16e64d8ed98b4..d3527aad6ae38 100644 --- a/js/react_native/ios/OnnxruntimeModule.mm +++ b/js/react_native/ios/OnnxruntimeModule.mm @@ -73,11 +73,7 @@ - (void)setBlobManager:(RCTBlobManager*)manager { * @param reject callback for returning an error back to react native js * @note when run() is called, the same modelPath must be passed into the first parameter. */ -RCT_EXPORT_METHOD(loadModel - : (NSString*)modelPath options - : (NSDictionary*)options resolver - : (RCTPromiseResolveBlock)resolve rejecter - : (RCTPromiseRejectBlock)reject) { +RCT_EXPORT_METHOD(loadModel : (NSString*)modelPath options : (NSDictionary*)options resolver : (RCTPromiseResolveBlock)resolve rejecter : (RCTPromiseRejectBlock)reject) { @try { NSDictionary* resultMap = [self loadModel:modelPath options:options]; resolve(resultMap); @@ -95,11 +91,7 @@ - (void)setBlobManager:(RCTBlobManager*)manager { * @param reject callback for returning an error back to react native js * @note when run() is called, the same modelPath must be passed into the first parameter. */ -RCT_EXPORT_METHOD(loadModelFromBlob - : (NSDictionary*)modelDataBlob options - : (NSDictionary*)options resolver - : (RCTPromiseResolveBlock)resolve rejecter - : (RCTPromiseRejectBlock)reject) { +RCT_EXPORT_METHOD(loadModelFromBlob : (NSDictionary*)modelDataBlob options : (NSDictionary*)options resolver : (RCTPromiseResolveBlock)resolve rejecter : (RCTPromiseRejectBlock)reject) { @try { [self checkBlobManager]; NSString* blobId = [modelDataBlob objectForKey:@"blobId"]; @@ -121,10 +113,7 @@ - (void)setBlobManager:(RCTBlobManager*)manager { * @param resolve callback for returning output back to react native js * @param reject callback for returning an error back to react native js */ -RCT_EXPORT_METHOD(dispose - : (NSString*)key resolver - : (RCTPromiseResolveBlock)resolve rejecter - : (RCTPromiseRejectBlock)reject) { +RCT_EXPORT_METHOD(dispose : (NSString*)key resolver : (RCTPromiseResolveBlock)resolve rejecter : (RCTPromiseRejectBlock)reject) { @try { [self dispose:key]; resolve(nil); @@ -143,13 +132,7 @@ - (void)setBlobManager:(RCTBlobManager*)manager { * @param resolve callback for returning an inference result back to react native js * @param reject callback for returning an error back to react native js */ -RCT_EXPORT_METHOD(run - : (NSString*)url input - : (NSDictionary*)input output - : (NSArray*)output options - : (NSDictionary*)options resolver - : (RCTPromiseResolveBlock)resolve rejecter - : (RCTPromiseRejectBlock)reject) { +RCT_EXPORT_METHOD(run : (NSString*)url input : (NSDictionary*)input output : (NSArray*)output options : (NSDictionary*)options resolver : (RCTPromiseResolveBlock)resolve rejecter : (RCTPromiseRejectBlock)reject) { @try { NSDictionary* resultMap = [self run:url input:input output:output options:options]; resolve(resultMap); diff --git a/js/react_native/ios/OnnxruntimeModule.xcodeproj/project.pbxproj b/js/react_native/ios/OnnxruntimeModule.xcodeproj/project.pbxproj index 835259706939f..b5984872a0310 100644 --- a/js/react_native/ios/OnnxruntimeModule.xcodeproj/project.pbxproj +++ b/js/react_native/ios/OnnxruntimeModule.xcodeproj/project.pbxproj @@ -7,15 +7,16 @@ objects = { /* Begin PBXBuildFile section */ - 0105483CF04B9471894F3EAA /* Pods_OnnxruntimeModuleTest.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = 38EB61A518C2DF782F7CD433 /* Pods_OnnxruntimeModuleTest.framework */; }; + 2507023E063B593E8767184B /* Pods_OnnxruntimeModuleTest.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = 380A4E10493D3005E7695737 /* Pods_OnnxruntimeModuleTest.framework */; }; 7FD234672A1F221700734B71 /* FakeRCTBlobManager.m in Sources */ = {isa = PBXBuildFile; fileRef = 7FD234662A1F221700734B71 /* FakeRCTBlobManager.m */; }; - C60033360456900E26D6F96F /* Pods_OnnxruntimeModule.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = 49D0ADD02E7162A5F0DE8BAB /* Pods_OnnxruntimeModule.framework */; }; DB8FC9B525C2867800C72F26 /* OnnxruntimeModule.mm in Sources */ = {isa = PBXBuildFile; fileRef = DB8FC9B425C2867800C72F26 /* OnnxruntimeModule.mm */; }; DB8FC9B825C2868700C72F26 /* TensorHelper.mm in Sources */ = {isa = PBXBuildFile; fileRef = DB8FC9B725C2868700C72F26 /* TensorHelper.mm */; }; DBDB57DA2603211A004F16BE /* TensorHelperTest.mm in Sources */ = {isa = PBXBuildFile; fileRef = DBDB57D92603211A004F16BE /* TensorHelperTest.mm */; }; DBDB57DC2603211A004F16BE /* libOnnxruntimeModule.a in Frameworks */ = {isa = PBXBuildFile; fileRef = 134814201AA4EA6300B7C361 /* libOnnxruntimeModule.a */; }; DBDB588B2609B18F004F16BE /* Resources in Resources */ = {isa = PBXBuildFile; fileRef = DBDB588A2609B18F004F16BE /* Resources */; }; DBDB58B0262A92D7004F16BE /* OnnxruntimeModuleTest.mm in Sources */ = {isa = PBXBuildFile; fileRef = DBDB58AF262A92D6004F16BE /* OnnxruntimeModuleTest.mm */; }; + E329E1182D372C780016B599 /* PrivacyInfo.xcprivacy in Resources */ = {isa = PBXBuildFile; fileRef = E329E1172D372C780016B599 /* PrivacyInfo.xcprivacy */; }; + F58B845092748409D2B634B9 /* Pods_OnnxruntimeModule.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = 7FFA29EB31D0567D9122F532 /* Pods_OnnxruntimeModule.framework */; }; /* End PBXBuildFile section */ /* Begin PBXContainerItemProxy section */ @@ -42,13 +43,13 @@ /* Begin PBXFileReference section */ 134814201AA4EA6300B7C361 /* libOnnxruntimeModule.a */ = {isa = PBXFileReference; explicitFileType = archive.ar; includeInIndex = 0; path = libOnnxruntimeModule.a; sourceTree = BUILT_PRODUCTS_DIR; }; - 38EB61A518C2DF782F7CD433 /* Pods_OnnxruntimeModuleTest.framework */ = {isa = PBXFileReference; explicitFileType = wrapper.framework; includeInIndex = 0; path = Pods_OnnxruntimeModuleTest.framework; sourceTree = BUILT_PRODUCTS_DIR; }; - 49D0ADD02E7162A5F0DE8BAB /* Pods_OnnxruntimeModule.framework */ = {isa = PBXFileReference; explicitFileType = wrapper.framework; includeInIndex = 0; path = Pods_OnnxruntimeModule.framework; sourceTree = BUILT_PRODUCTS_DIR; }; + 380A4E10493D3005E7695737 /* Pods_OnnxruntimeModuleTest.framework */ = {isa = PBXFileReference; explicitFileType = wrapper.framework; includeInIndex = 0; path = Pods_OnnxruntimeModuleTest.framework; sourceTree = BUILT_PRODUCTS_DIR; }; 5391B4C0B7C168594AA0DD0B /* Pods-OnnxruntimeModuleTest.debug.xcconfig */ = {isa = PBXFileReference; includeInIndex = 1; lastKnownFileType = text.xcconfig; name = "Pods-OnnxruntimeModuleTest.debug.xcconfig"; path = "Target Support Files/Pods-OnnxruntimeModuleTest/Pods-OnnxruntimeModuleTest.debug.xcconfig"; sourceTree = ""; }; 548638FE75FCC69C842C9545 /* Pods-OnnxruntimeModule.release.xcconfig */ = {isa = PBXFileReference; includeInIndex = 1; lastKnownFileType = text.xcconfig; name = "Pods-OnnxruntimeModule.release.xcconfig"; path = "Target Support Files/Pods-OnnxruntimeModule/Pods-OnnxruntimeModule.release.xcconfig"; sourceTree = ""; }; 63B05EB079B0A4D99448F1D3 /* Pods-OnnxruntimeModule.debug.xcconfig */ = {isa = PBXFileReference; includeInIndex = 1; lastKnownFileType = text.xcconfig; name = "Pods-OnnxruntimeModule.debug.xcconfig"; path = "Target Support Files/Pods-OnnxruntimeModule/Pods-OnnxruntimeModule.debug.xcconfig"; sourceTree = ""; }; 7FD234662A1F221700734B71 /* FakeRCTBlobManager.m */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.objc; path = FakeRCTBlobManager.m; sourceTree = ""; }; 7FD234682A1F234500734B71 /* FakeRCTBlobManager.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = FakeRCTBlobManager.h; sourceTree = ""; }; + 7FFA29EB31D0567D9122F532 /* Pods_OnnxruntimeModule.framework */ = {isa = PBXFileReference; explicitFileType = wrapper.framework; includeInIndex = 0; path = Pods_OnnxruntimeModule.framework; sourceTree = BUILT_PRODUCTS_DIR; }; 8529D8A6F40E462E62B38B52 /* Pods-OnnxruntimeModuleTest.release.xcconfig */ = {isa = PBXFileReference; includeInIndex = 1; lastKnownFileType = text.xcconfig; name = "Pods-OnnxruntimeModuleTest.release.xcconfig"; path = "Target Support Files/Pods-OnnxruntimeModuleTest/Pods-OnnxruntimeModuleTest.release.xcconfig"; sourceTree = ""; }; DB8FC9B425C2867800C72F26 /* OnnxruntimeModule.mm */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.objcpp; path = OnnxruntimeModule.mm; sourceTree = SOURCE_ROOT; }; DB8FC9B725C2868700C72F26 /* TensorHelper.mm */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.objcpp; path = TensorHelper.mm; sourceTree = SOURCE_ROOT; }; @@ -57,6 +58,7 @@ DBDB57DB2603211A004F16BE /* Info.plist */ = {isa = PBXFileReference; lastKnownFileType = text.plist.xml; path = Info.plist; sourceTree = ""; }; DBDB588A2609B18F004F16BE /* Resources */ = {isa = PBXFileReference; lastKnownFileType = folder; name = Resources; path = OnnxruntimeModuleTest/Resources; sourceTree = ""; }; DBDB58AF262A92D6004F16BE /* OnnxruntimeModuleTest.mm */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.objcpp; path = OnnxruntimeModuleTest.mm; sourceTree = ""; }; + E329E1172D372C780016B599 /* PrivacyInfo.xcprivacy */ = {isa = PBXFileReference; lastKnownFileType = text.xml; path = PrivacyInfo.xcprivacy; sourceTree = ""; }; /* End PBXFileReference section */ /* Begin PBXFrameworksBuildPhase section */ @@ -64,7 +66,7 @@ isa = PBXFrameworksBuildPhase; buildActionMask = 2147483647; files = ( - C60033360456900E26D6F96F /* Pods_OnnxruntimeModule.framework in Frameworks */, + F58B845092748409D2B634B9 /* Pods_OnnxruntimeModule.framework in Frameworks */, ); runOnlyForDeploymentPostprocessing = 0; }; @@ -73,7 +75,7 @@ buildActionMask = 2147483647; files = ( DBDB57DC2603211A004F16BE /* libOnnxruntimeModule.a in Frameworks */, - 0105483CF04B9471894F3EAA /* Pods_OnnxruntimeModuleTest.framework in Frameworks */, + 2507023E063B593E8767184B /* Pods_OnnxruntimeModuleTest.framework in Frameworks */, ); runOnlyForDeploymentPostprocessing = 0; }; @@ -91,6 +93,7 @@ 58B511D21A9E6C8500147676 = { isa = PBXGroup; children = ( + E329E1172D372C780016B599 /* PrivacyInfo.xcprivacy */, DBDB588A2609B18F004F16BE /* Resources */, DB8FC9B325C2861300C72F26 /* OnnxruntimeModule */, DBDB57D82603211A004F16BE /* OnnxruntimeModuleTest */, @@ -115,8 +118,8 @@ 6FFDF1594C99DA125B013E34 /* Frameworks */ = { isa = PBXGroup; children = ( - 49D0ADD02E7162A5F0DE8BAB /* Pods_OnnxruntimeModule.framework */, - 38EB61A518C2DF782F7CD433 /* Pods_OnnxruntimeModuleTest.framework */, + 7FFA29EB31D0567D9122F532 /* Pods_OnnxruntimeModule.framework */, + 380A4E10493D3005E7695737 /* Pods_OnnxruntimeModuleTest.framework */, ); name = Frameworks; sourceTree = ""; @@ -171,7 +174,7 @@ DBDB57D32603211A004F16BE /* Sources */, DBDB57D42603211A004F16BE /* Frameworks */, DBDB57D52603211A004F16BE /* Resources */, - 015C75E59BC80D4507FB6E8A /* [CP] Embed Pods Frameworks */, + 8FE621EF8E674693B253B8F6 /* [CP] Embed Pods Frameworks */, ); buildRules = ( ); @@ -227,6 +230,7 @@ isa = PBXResourcesBuildPhase; buildActionMask = 2147483647; files = ( + E329E1182D372C780016B599 /* PrivacyInfo.xcprivacy in Resources */, DBDB588B2609B18F004F16BE /* Resources in Resources */, ); runOnlyForDeploymentPostprocessing = 0; @@ -234,7 +238,29 @@ /* End PBXResourcesBuildPhase section */ /* Begin PBXShellScriptBuildPhase section */ - 015C75E59BC80D4507FB6E8A /* [CP] Embed Pods Frameworks */ = { + 896E89AEC864CBD0CC7E0AF1 /* [CP] Check Pods Manifest.lock */ = { + isa = PBXShellScriptBuildPhase; + buildActionMask = 2147483647; + files = ( + ); + inputFileListPaths = ( + ); + inputPaths = ( + "${PODS_PODFILE_DIR_PATH}/Podfile.lock", + "${PODS_ROOT}/Manifest.lock", + ); + name = "[CP] Check Pods Manifest.lock"; + outputFileListPaths = ( + ); + outputPaths = ( + "$(DERIVED_FILE_DIR)/Pods-OnnxruntimeModuleTest-checkManifestLockResult.txt", + ); + runOnlyForDeploymentPostprocessing = 0; + shellPath = /bin/sh; + shellScript = "diff \"${PODS_PODFILE_DIR_PATH}/Podfile.lock\" \"${PODS_ROOT}/Manifest.lock\" > /dev/null\nif [ $? != 0 ] ; then\n # print error to STDERR\n echo \"error: The sandbox is not in sync with the Podfile.lock. Run 'pod install' or update your CocoaPods installation.\" >&2\n exit 1\nfi\n# This output is used by Xcode 'outputs' to avoid re-running this script phase.\necho \"SUCCESS\" > \"${SCRIPT_OUTPUT_FILE_0}\"\n"; + showEnvVarsInLog = 0; + }; + 8FE621EF8E674693B253B8F6 /* [CP] Embed Pods Frameworks */ = { isa = PBXShellScriptBuildPhase; buildActionMask = 2147483647; files = ( @@ -248,6 +274,7 @@ "${BUILT_PRODUCTS_DIR}/React-Core/React.framework", "${BUILT_PRODUCTS_DIR}/React-CoreModules/CoreModules.framework", "${BUILT_PRODUCTS_DIR}/React-RCTAnimation/RCTAnimation.framework", + "${BUILT_PRODUCTS_DIR}/React-RCTAppDelegate/React_RCTAppDelegate.framework", "${BUILT_PRODUCTS_DIR}/React-RCTBlob/RCTBlob.framework", "${BUILT_PRODUCTS_DIR}/React-RCTImage/RCTImage.framework", "${BUILT_PRODUCTS_DIR}/React-RCTLinking/RCTLinking.framework", @@ -255,8 +282,8 @@ "${BUILT_PRODUCTS_DIR}/React-RCTSettings/RCTSettings.framework", "${BUILT_PRODUCTS_DIR}/React-RCTText/RCTText.framework", "${BUILT_PRODUCTS_DIR}/React-RCTVibration/RCTVibration.framework", - "${BUILT_PRODUCTS_DIR}/React-bridging/react_bridging.framework", "${BUILT_PRODUCTS_DIR}/React-cxxreact/cxxreact.framework", + "${BUILT_PRODUCTS_DIR}/React-jsc/React_jsc.framework", "${BUILT_PRODUCTS_DIR}/React-jsi/jsi.framework", "${BUILT_PRODUCTS_DIR}/React-jsiexecutor/jsireact.framework", "${BUILT_PRODUCTS_DIR}/React-jsinspector/jsinspector.framework", @@ -276,6 +303,7 @@ "${TARGET_BUILD_DIR}/${FRAMEWORKS_FOLDER_PATH}/React.framework", "${TARGET_BUILD_DIR}/${FRAMEWORKS_FOLDER_PATH}/CoreModules.framework", "${TARGET_BUILD_DIR}/${FRAMEWORKS_FOLDER_PATH}/RCTAnimation.framework", + "${TARGET_BUILD_DIR}/${FRAMEWORKS_FOLDER_PATH}/React_RCTAppDelegate.framework", "${TARGET_BUILD_DIR}/${FRAMEWORKS_FOLDER_PATH}/RCTBlob.framework", "${TARGET_BUILD_DIR}/${FRAMEWORKS_FOLDER_PATH}/RCTImage.framework", "${TARGET_BUILD_DIR}/${FRAMEWORKS_FOLDER_PATH}/RCTLinking.framework", @@ -283,8 +311,8 @@ "${TARGET_BUILD_DIR}/${FRAMEWORKS_FOLDER_PATH}/RCTSettings.framework", "${TARGET_BUILD_DIR}/${FRAMEWORKS_FOLDER_PATH}/RCTText.framework", "${TARGET_BUILD_DIR}/${FRAMEWORKS_FOLDER_PATH}/RCTVibration.framework", - "${TARGET_BUILD_DIR}/${FRAMEWORKS_FOLDER_PATH}/react_bridging.framework", "${TARGET_BUILD_DIR}/${FRAMEWORKS_FOLDER_PATH}/cxxreact.framework", + "${TARGET_BUILD_DIR}/${FRAMEWORKS_FOLDER_PATH}/React_jsc.framework", "${TARGET_BUILD_DIR}/${FRAMEWORKS_FOLDER_PATH}/jsi.framework", "${TARGET_BUILD_DIR}/${FRAMEWORKS_FOLDER_PATH}/jsireact.framework", "${TARGET_BUILD_DIR}/${FRAMEWORKS_FOLDER_PATH}/jsinspector.framework", @@ -300,28 +328,6 @@ shellScript = "\"${PODS_ROOT}/Target Support Files/Pods-OnnxruntimeModuleTest/Pods-OnnxruntimeModuleTest-frameworks.sh\"\n"; showEnvVarsInLog = 0; }; - 896E89AEC864CBD0CC7E0AF1 /* [CP] Check Pods Manifest.lock */ = { - isa = PBXShellScriptBuildPhase; - buildActionMask = 2147483647; - files = ( - ); - inputFileListPaths = ( - ); - inputPaths = ( - "${PODS_PODFILE_DIR_PATH}/Podfile.lock", - "${PODS_ROOT}/Manifest.lock", - ); - name = "[CP] Check Pods Manifest.lock"; - outputFileListPaths = ( - ); - outputPaths = ( - "$(DERIVED_FILE_DIR)/Pods-OnnxruntimeModuleTest-checkManifestLockResult.txt", - ); - runOnlyForDeploymentPostprocessing = 0; - shellPath = /bin/sh; - shellScript = "diff \"${PODS_PODFILE_DIR_PATH}/Podfile.lock\" \"${PODS_ROOT}/Manifest.lock\" > /dev/null\nif [ $? != 0 ] ; then\n # print error to STDERR\n echo \"error: The sandbox is not in sync with the Podfile.lock. Run 'pod install' or update your CocoaPods installation.\" >&2\n exit 1\nfi\n# This output is used by Xcode 'outputs' to avoid re-running this script phase.\necho \"SUCCESS\" > \"${SCRIPT_OUTPUT_FILE_0}\"\n"; - showEnvVarsInLog = 0; - }; FA8BD7B76BD8BD02A6DB750A /* [CP] Check Pods Manifest.lock */ = { isa = PBXShellScriptBuildPhase; buildActionMask = 2147483647; @@ -381,7 +387,7 @@ isa = XCBuildConfiguration; buildSettings = { ALWAYS_SEARCH_USER_PATHS = NO; - CLANG_CXX_LANGUAGE_STANDARD = "gnu++0x"; + CLANG_CXX_LANGUAGE_STANDARD = "c++17"; CLANG_CXX_LIBRARY = "libc++"; CLANG_ENABLE_MODULES = YES; CLANG_ENABLE_OBJC_ARC = YES; @@ -405,6 +411,7 @@ COPY_PHASE_STRIP = NO; ENABLE_STRICT_OBJC_MSGSEND = YES; ENABLE_TESTABILITY = YES; + "EXCLUDED_ARCHS[sdk=iphonesimulator*]" = ""; GCC_C_LANGUAGE_STANDARD = gnu99; GCC_DYNAMIC_NO_PIC = NO; GCC_NO_COMMON_BLOCKS = YES; @@ -412,6 +419,7 @@ GCC_PREPROCESSOR_DEFINITIONS = ( "DEBUG=1", "$(inherited)", + _LIBCPP_ENABLE_CXX17_REMOVED_UNARY_BINARY_FUNCTION, ); GCC_SYMBOLS_PRIVATE_EXTERN = NO; GCC_WARN_64_TO_32_BIT_CONVERSION = YES; @@ -422,9 +430,11 @@ GCC_WARN_UNUSED_VARIABLE = YES; "HEADER_SEARCH_PATHS[arch=*]" = ""; IPHONEOS_DEPLOYMENT_TARGET = 15.1; - LIBRARY_SEARCH_PATHS = ""; + LIBRARY_SEARCH_PATHS = "$(SDKROOT)/usr/lib/swift"; MTL_ENABLE_DEBUG_INFO = YES; ONLY_ACTIVE_ARCH = YES; + OTHER_LDFLAGS = "$(inherited)"; + REACT_NATIVE_PATH = "${PODS_ROOT}/../../node_modules/react-native"; SDKROOT = iphoneos; }; name = Debug; @@ -433,7 +443,7 @@ isa = XCBuildConfiguration; buildSettings = { ALWAYS_SEARCH_USER_PATHS = NO; - CLANG_CXX_LANGUAGE_STANDARD = "gnu++0x"; + CLANG_CXX_LANGUAGE_STANDARD = "c++17"; CLANG_CXX_LIBRARY = "libc++"; CLANG_ENABLE_MODULES = YES; CLANG_ENABLE_OBJC_ARC = YES; @@ -457,8 +467,13 @@ COPY_PHASE_STRIP = YES; ENABLE_NS_ASSERTIONS = NO; ENABLE_STRICT_OBJC_MSGSEND = YES; + "EXCLUDED_ARCHS[sdk=iphonesimulator*]" = ""; GCC_C_LANGUAGE_STANDARD = gnu99; GCC_NO_COMMON_BLOCKS = YES; + GCC_PREPROCESSOR_DEFINITIONS = ( + "$(inherited)", + _LIBCPP_ENABLE_CXX17_REMOVED_UNARY_BINARY_FUNCTION, + ); GCC_WARN_64_TO_32_BIT_CONVERSION = YES; GCC_WARN_ABOUT_RETURN_TYPE = YES_ERROR; GCC_WARN_UNDECLARED_SELECTOR = YES; @@ -467,8 +482,10 @@ GCC_WARN_UNUSED_VARIABLE = YES; "HEADER_SEARCH_PATHS[arch=*]" = ""; IPHONEOS_DEPLOYMENT_TARGET = 15.1; - LIBRARY_SEARCH_PATHS = ""; + LIBRARY_SEARCH_PATHS = "$(SDKROOT)/usr/lib/swift"; MTL_ENABLE_DEBUG_INFO = NO; + OTHER_LDFLAGS = "$(inherited)"; + REACT_NATIVE_PATH = "${PODS_ROOT}/../../node_modules/react-native"; SDKROOT = iphoneos; VALIDATE_PRODUCT = YES; }; @@ -487,6 +504,7 @@ "HEADER_SEARCH_PATHS[arch=*]" = "\"$(PODS_ROOT)/onnxruntime/onnxruntime.framework/Headers\""; IPHONEOS_DEPLOYMENT_TARGET = 15.1; LIBRARY_SEARCH_PATHS = ( + "$(SDKROOT)/usr/lib/swift", "$(inherited)", "$(PROJECT_DIR)", ); @@ -512,6 +530,7 @@ "HEADER_SEARCH_PATHS[arch=*]" = "\"$(PODS_ROOT)/onnxruntime/onnxruntime.framework/Headers\""; IPHONEOS_DEPLOYMENT_TARGET = 15.1; LIBRARY_SEARCH_PATHS = ( + "$(SDKROOT)/usr/lib/swift", "$(inherited)", "$(PROJECT_DIR)", ); @@ -586,8 +605,12 @@ ); INFOPLIST_FILE = OnnxruntimeModuleTest/Info.plist; IPHONEOS_DEPLOYMENT_TARGET = 15.1; - LD_RUNPATH_SEARCH_PATHS = "$(inherited) @executable_path/Frameworks @loader_path/Frameworks"; - LIBRARY_SEARCH_PATHS = "$(inherited)"; + LD_RUNPATH_SEARCH_PATHS = ( + "$(inherited)", + "@executable_path/Frameworks", + "@loader_path/Frameworks", + ); + LIBRARY_SEARCH_PATHS = "$(SDKROOT)/usr/lib/swift$(inherited)"; MTL_ENABLE_DEBUG_INFO = INCLUDE_SOURCE; MTL_FAST_MATH = YES; PRODUCT_BUNDLE_IDENTIFIER = ai.onnxruntime.reactnative.OnnxruntimeModuleTest; @@ -660,8 +683,12 @@ ); INFOPLIST_FILE = OnnxruntimeModuleTest/Info.plist; IPHONEOS_DEPLOYMENT_TARGET = 15.1; - LD_RUNPATH_SEARCH_PATHS = "$(inherited) @executable_path/Frameworks @loader_path/Frameworks"; - LIBRARY_SEARCH_PATHS = "$(inherited)"; + LD_RUNPATH_SEARCH_PATHS = ( + "$(inherited)", + "@executable_path/Frameworks", + "@loader_path/Frameworks", + ); + LIBRARY_SEARCH_PATHS = "$(SDKROOT)/usr/lib/swift$(inherited)"; MTL_FAST_MATH = YES; PRODUCT_BUNDLE_IDENTIFIER = ai.onnxruntime.reactnative.OnnxruntimeModuleTest; PRODUCT_NAME = "$(TARGET_NAME)"; diff --git a/js/react_native/ios/OnnxruntimeModuleTest/OnnxruntimeModuleTest.mm b/js/react_native/ios/OnnxruntimeModuleTest/OnnxruntimeModuleTest.mm index 7059177400f3c..326990a515510 100644 --- a/js/react_native/ios/OnnxruntimeModuleTest/OnnxruntimeModuleTest.mm +++ b/js/react_native/ios/OnnxruntimeModuleTest/OnnxruntimeModuleTest.mm @@ -144,7 +144,9 @@ - (void)testOnnxruntimeModule_AppendCoreml { XCTAssertEqualObjects(outputNames[0], @"output"); } - { [onnxruntimeModule dispose:sessionKey]; } + { + [onnxruntimeModule dispose:sessionKey]; + } } @end diff --git a/js/react_native/ios/Podfile b/js/react_native/ios/Podfile index b8ee6ac1a1695..9e9446576581c 100644 --- a/js/react_native/ios/Podfile +++ b/js/react_native/ios/Podfile @@ -3,22 +3,25 @@ require_relative '../node_modules/@react-native-community/cli-platform-ios/nativ platform :ios, '15.1' -pre_install do |installer| - # Custom pre-install script or commands - puts "Running pre-install script..." - - # Recommended fix for https://github.com/facebook/react-native/issues/32483 - # from https://github.com/facebook/react-native/issues/32483#issuecomment-966784501 - system("sed -i '' 's/typedef uint8_t clockid_t;//' \"./Pods/RCT-Folly/folly/portability/Time.h\"") -end +prepare_react_native_project! +# use_frameworks need to be loaded before use_react_native! for now +use_frameworks! def shared config = use_native_modules! - use_react_native!(:path => config["reactNativePath"]) + # Flags change depending on the env values. + flags = get_default_flags() - # Comment the next line if you don't want to use dynamic frameworks - use_frameworks! + use_react_native!( + :path => config[:reactNativePath], + # Hermes is now enabled by default. Disable by setting this flag to false. + # Upcoming versions of React Native may rely on get_default_flags(), but + # we make it explicit here to aid in the React Native upgrade process. + :hermes_enabled => false, + :fabric_enabled => false, + :app_path => "#{Pod::Config.instance.installation_root}/.." + ) ort_c_local_pod_path = ENV['ORT_C_LOCAL_POD_PATH'] if ort_c_local_pod_path != nil @@ -27,8 +30,7 @@ def shared else pod 'onnxruntime-c' end - - inherit! :search_paths + inherit! :complete end target 'OnnxruntimeModule' do @@ -40,11 +42,11 @@ target 'OnnxruntimeModuleTest' do end post_install do |installer| - installer.generated_projects.each do |project| - project.targets.each do |target| - target.build_configurations.each do |config| - config.build_settings['IPHONEOS_DEPLOYMENT_TARGET'] = '15.1' - end - end - end -end + react_native_post_install( + installer, + # Set `mac_catalyst_enabled` to `true` in order to apply patches + # necessary for Mac Catalyst builds + :mac_catalyst_enabled => false + ) + __apply_Xcode_12_5_M1_post_install_workaround(installer) +end \ No newline at end of file diff --git a/js/react_native/ios/PrivacyInfo.xcprivacy b/js/react_native/ios/PrivacyInfo.xcprivacy new file mode 100644 index 0000000000000..3639c19774648 --- /dev/null +++ b/js/react_native/ios/PrivacyInfo.xcprivacy @@ -0,0 +1,38 @@ + + + + + NSPrivacyCollectedDataTypes + + + NSPrivacyAccessedAPITypes + + + NSPrivacyAccessedAPIType + NSPrivacyAccessedAPICategoryFileTimestamp + NSPrivacyAccessedAPITypeReasons + + C617.1 + + + + NSPrivacyAccessedAPIType + NSPrivacyAccessedAPICategoryUserDefaults + NSPrivacyAccessedAPITypeReasons + + CA92.1 + + + + NSPrivacyAccessedAPIType + NSPrivacyAccessedAPICategorySystemBootTime + NSPrivacyAccessedAPITypeReasons + + 35F9.1 + + + + NSPrivacyTracking + + + \ No newline at end of file diff --git a/js/react_native/package.json b/js/react_native/package.json index ff798530f59d3..29cf3a0a04df0 100644 --- a/js/react_native/package.json +++ b/js/react_native/package.json @@ -17,14 +17,14 @@ "ONNX Runtime" ], "devDependencies": { - "@types/jest": "^27.4.0", - "@types/react": "^18.0.9", + "@types/jest": "^29.2.1", + "@types/react": "^18.0.24", "@types/react-native": "^0.67.7", - "jest": "^27.4.7", + "jest": "^29.2.1", "pod-install": "^0.1.36", "prettier": "^2.6.2", - "react": "^18.1.0", - "react-native": "^0.69.7", + "react": "^18.2.0", + "react-native": "^0.71.19", "react-native-builder-bob": "^0.18.2" }, "peerDependencies": { diff --git a/js/react_native/yarn.lock b/js/react_native/yarn.lock index fd424f1f76089..9550e0221d2ca 100644 --- a/js/react_native/yarn.lock +++ b/js/react_native/yarn.lock @@ -1640,42 +1640,42 @@ "@nodelib/fs.scandir" "2.1.5" fastq "^1.6.0" -"@react-native-community/cli-clean@^8.0.4": - version "8.0.4" - resolved "https://registry.yarnpkg.com/@react-native-community/cli-clean/-/cli-clean-8.0.4.tgz#97e16a20e207b95de12e29b03816e8f2b2c80cc7" - integrity sha512-IwS1M1NHg6+qL8PThZYMSIMYbZ6Zbx+lIck9PLBskbosFo24M3lCOflOl++Bggjakp6mR+sRXxLMexid/GeOsQ== +"@react-native-community/cli-clean@^9.2.1": + version "9.2.1" + resolved "https://registry.yarnpkg.com/@react-native-community/cli-clean/-/cli-clean-9.2.1.tgz#198c5dd39c432efb5374582073065ff75d67d018" + integrity sha512-dyNWFrqRe31UEvNO+OFWmQ4hmqA07bR9Ief/6NnGwx67IO9q83D5PEAf/o96ML6jhSbDwCmpPKhPwwBbsyM3mQ== dependencies: - "@react-native-community/cli-tools" "^8.0.4" + "@react-native-community/cli-tools" "^9.2.1" chalk "^4.1.2" execa "^1.0.0" prompts "^2.4.0" -"@react-native-community/cli-config@^8.0.6": - version "8.0.6" - resolved "https://registry.yarnpkg.com/@react-native-community/cli-config/-/cli-config-8.0.6.tgz#041eee7dd8fdef595bf7a3f24228c173bf294a44" - integrity sha512-mjVpVvdh8AviiO8xtqeX+BkjqE//NMDnISwsLWSJUfNCwTAPmdR8PGbhgP5O4hWHyJ3WkepTopl0ya7Tfi3ifw== +"@react-native-community/cli-config@^9.2.1": + version "9.2.1" + resolved "https://registry.yarnpkg.com/@react-native-community/cli-config/-/cli-config-9.2.1.tgz#54eb026d53621ccf3a9df8b189ac24f6e56b8750" + integrity sha512-gHJlBBXUgDN9vrr3aWkRqnYrPXZLztBDQoY97Mm5Yo6MidsEpYo2JIP6FH4N/N2p1TdjxJL4EFtdd/mBpiR2MQ== dependencies: - "@react-native-community/cli-tools" "^8.0.4" + "@react-native-community/cli-tools" "^9.2.1" cosmiconfig "^5.1.0" deepmerge "^3.2.0" glob "^7.1.3" joi "^17.2.1" -"@react-native-community/cli-debugger-ui@^8.0.0": - version "8.0.0" - resolved "https://registry.yarnpkg.com/@react-native-community/cli-debugger-ui/-/cli-debugger-ui-8.0.0.tgz#98263dc525e65015e2d6392c940114028f87e8e9" - integrity sha512-u2jq06GZwZ9sRERzd9FIgpW6yv4YOW4zz7Ym/B8eSzviLmy3yI/8mxJtvlGW+J8lBsfMcQoqJpqI6Rl1nZy9yQ== +"@react-native-community/cli-debugger-ui@^9.0.0": + version "9.0.0" + resolved "https://registry.yarnpkg.com/@react-native-community/cli-debugger-ui/-/cli-debugger-ui-9.0.0.tgz#ea5c5dad6008bccd840d858e160d42bb2ced8793" + integrity sha512-7hH05ZwU9Tp0yS6xJW0bqcZPVt0YCK7gwj7gnRu1jDNN2kughf6Lg0Ys29rAvtZ7VO1PK5c1O+zs7yFnylQDUA== dependencies: serve-static "^1.13.1" -"@react-native-community/cli-doctor@^8.0.6": - version "8.0.6" - resolved "https://registry.yarnpkg.com/@react-native-community/cli-doctor/-/cli-doctor-8.0.6.tgz#954250155ab2f3a66a54821e071bc4a631d2dfff" - integrity sha512-ZQqyT9mJMVeFEVIwj8rbDYGCA2xXjJfsQjWk2iTRZ1CFHfhPSUuUiG8r6mJmTinAP9t+wYcbbIYzNgdSUKnDMw== +"@react-native-community/cli-doctor@^9.3.0": + version "9.3.0" + resolved "https://registry.yarnpkg.com/@react-native-community/cli-doctor/-/cli-doctor-9.3.0.tgz#8817a3fd564453467def5b5bc8aecdc4205eff50" + integrity sha512-/fiuG2eDGC2/OrXMOWI5ifq4X1gdYTQhvW2m0TT5Lk1LuFiZsbTCp1lR+XILKekuTvmYNjEGdVpeDpdIWlXdEA== dependencies: - "@react-native-community/cli-config" "^8.0.6" - "@react-native-community/cli-platform-ios" "^8.0.6" - "@react-native-community/cli-tools" "^8.0.4" + "@react-native-community/cli-config" "^9.2.1" + "@react-native-community/cli-platform-ios" "^9.3.0" + "@react-native-community/cli-tools" "^9.2.1" chalk "^4.1.2" command-exists "^1.2.8" envinfo "^7.7.2" @@ -1690,69 +1690,64 @@ sudo-prompt "^9.0.0" wcwidth "^1.0.1" -"@react-native-community/cli-hermes@^8.0.5": - version "8.0.5" - resolved "https://registry.yarnpkg.com/@react-native-community/cli-hermes/-/cli-hermes-8.0.5.tgz#639edc6b0ce73f705e4b737e3de1cc47d42516ff" - integrity sha512-Zm0wM6SfgYAEX1kfJ1QBvTayabvh79GzmjHyuSnEROVNPbl4PeCG4WFbwy489tGwOP9Qx9fMT5tRIFCD8bp6/g== +"@react-native-community/cli-hermes@^9.3.4": + version "9.3.4" + resolved "https://registry.yarnpkg.com/@react-native-community/cli-hermes/-/cli-hermes-9.3.4.tgz#47851847c4990272687883bd8bf53733d5f3c341" + integrity sha512-VqTPA7kknCXgtYlRf+sDWW4yxZ6Gtg1Ga+Rdrn1qSKuo09iJ8YKPoQYOu5nqbIYJQAEhorWQyo1VvNgd0wd49w== dependencies: - "@react-native-community/cli-platform-android" "^8.0.5" - "@react-native-community/cli-tools" "^8.0.4" + "@react-native-community/cli-platform-android" "^9.3.4" + "@react-native-community/cli-tools" "^9.2.1" chalk "^4.1.2" hermes-profile-transformer "^0.0.6" ip "^1.1.5" -"@react-native-community/cli-platform-android@^8.0.4", "@react-native-community/cli-platform-android@^8.0.5": - version "8.0.5" - resolved "https://registry.yarnpkg.com/@react-native-community/cli-platform-android/-/cli-platform-android-8.0.5.tgz#da11d2678adeca98e83494d68de80e50571b4af4" - integrity sha512-z1YNE4T1lG5o9acoQR1GBvf7mq6Tzayqo/za5sHVSOJAC9SZOuVN/gg/nkBa9a8n5U7qOMFXfwhTMNqA474gXA== +"@react-native-community/cli-platform-android@9.3.4", "@react-native-community/cli-platform-android@^9.3.4": + version "9.3.4" + resolved "https://registry.yarnpkg.com/@react-native-community/cli-platform-android/-/cli-platform-android-9.3.4.tgz#42f22943b6ee15713add6af8608c1d0ebf79d774" + integrity sha512-BTKmTMYFuWtMqimFQJfhRyhIWw1m+5N5svR1S5+DqPcyFuSXrpNYDWNSFR8E105xUbFANmsCZZQh6n1WlwMpOA== dependencies: - "@react-native-community/cli-tools" "^8.0.4" + "@react-native-community/cli-tools" "^9.2.1" chalk "^4.1.2" execa "^1.0.0" fs-extra "^8.1.0" glob "^7.1.3" - jetifier "^1.6.2" - lodash "^4.17.15" logkitty "^0.7.1" slash "^3.0.0" -"@react-native-community/cli-platform-ios@^8.0.4", "@react-native-community/cli-platform-ios@^8.0.6": - version "8.0.6" - resolved "https://registry.yarnpkg.com/@react-native-community/cli-platform-ios/-/cli-platform-ios-8.0.6.tgz#ab80cd4eb3014b8fcfc9bd1b53ec0a9f8e5d1430" - integrity sha512-CMR6mu/LVx6JVfQRDL9uULsMirJT633bODn+IrYmrwSz250pnhON16We8eLPzxOZHyDjm7JPuSgHG3a/BPiRuQ== +"@react-native-community/cli-platform-ios@9.3.0", "@react-native-community/cli-platform-ios@^9.3.0": + version "9.3.0" + resolved "https://registry.yarnpkg.com/@react-native-community/cli-platform-ios/-/cli-platform-ios-9.3.0.tgz#45abde2a395fddd7cf71e8b746c1dc1ee2260f9a" + integrity sha512-nihTX53BhF2Q8p4B67oG3RGe1XwggoGBrMb6vXdcu2aN0WeXJOXdBLgR900DAA1O8g7oy1Sudu6we+JsVTKnjw== dependencies: - "@react-native-community/cli-tools" "^8.0.4" + "@react-native-community/cli-tools" "^9.2.1" chalk "^4.1.2" execa "^1.0.0" glob "^7.1.3" - js-yaml "^3.13.1" - lodash "^4.17.15" ora "^5.4.1" - plist "^3.0.2" -"@react-native-community/cli-plugin-metro@^8.0.4": - version "8.0.4" - resolved "https://registry.yarnpkg.com/@react-native-community/cli-plugin-metro/-/cli-plugin-metro-8.0.4.tgz#a364a50a2e05fc5d0b548759e499e5b681b6e4cc" - integrity sha512-UWzY1eMcEr/6262R2+d0Is5M3L/7Y/xXSDIFMoc5Rv5Wucl3hJM/TxHXmByvHpuJf6fJAfqOskyt4bZCvbI+wQ== +"@react-native-community/cli-plugin-metro@^9.3.3": + version "9.3.3" + resolved "https://registry.yarnpkg.com/@react-native-community/cli-plugin-metro/-/cli-plugin-metro-9.3.3.tgz#330d7b9476a3fdabdd5863f114fa962289e280dc" + integrity sha512-lPBw6XieNdj2AbWDN0Rc+jNOx8hBgSQyv0gUAm01qtJe4I9FjSMU6nOGTxMpWpICo6TYl/cmPGXOzbfpwxwtkQ== dependencies: - "@react-native-community/cli-server-api" "^8.0.4" - "@react-native-community/cli-tools" "^8.0.4" + "@react-native-community/cli-server-api" "^9.2.1" + "@react-native-community/cli-tools" "^9.2.1" chalk "^4.1.2" - metro "^0.70.1" - metro-config "^0.70.1" - metro-core "^0.70.1" - metro-react-native-babel-transformer "^0.70.1" - metro-resolver "^0.70.1" - metro-runtime "^0.70.1" + metro "0.72.4" + metro-config "0.72.4" + metro-core "0.72.4" + metro-react-native-babel-transformer "0.72.4" + metro-resolver "0.72.4" + metro-runtime "0.72.4" readline "^1.3.0" -"@react-native-community/cli-server-api@^8.0.4": - version "8.0.4" - resolved "https://registry.yarnpkg.com/@react-native-community/cli-server-api/-/cli-server-api-8.0.4.tgz#d45d895a0a6e8b960c9d677188d414a996faa4d3" - integrity sha512-Orr14njx1E70CVrUA8bFdl+mrnbuXUjf1Rhhm0RxUadFpvkHuOi5dh8Bryj2MKtf8eZrpEwZ7tuQPhJEULW16A== +"@react-native-community/cli-server-api@^9.2.1": + version "9.2.1" + resolved "https://registry.yarnpkg.com/@react-native-community/cli-server-api/-/cli-server-api-9.2.1.tgz#41ac5916b21d324bccef447f75600c03b2f54fbe" + integrity sha512-EI+9MUxEbWBQhWw2PkhejXfkcRqPl+58+whlXJvKHiiUd7oVbewFs0uLW0yZffUutt4FGx6Uh88JWEgwOzAdkw== dependencies: - "@react-native-community/cli-debugger-ui" "^8.0.0" - "@react-native-community/cli-tools" "^8.0.4" + "@react-native-community/cli-debugger-ui" "^9.0.0" + "@react-native-community/cli-tools" "^9.2.1" compression "^1.7.1" connect "^3.6.5" errorhandler "^1.5.0" @@ -1761,15 +1756,14 @@ serve-static "^1.13.1" ws "^7.5.1" -"@react-native-community/cli-tools@^8.0.4": - version "8.0.4" - resolved "https://registry.yarnpkg.com/@react-native-community/cli-tools/-/cli-tools-8.0.4.tgz#994b9d56c84472491c876b71acd4356773fcbe65" - integrity sha512-ePN9lGxh6LRFiotyddEkSmuqpQhnq2iw9oiXYr4EFWpIEy0yCigTuSTiDF68+c8M9B+7bTwkRpz/rMPC4ViO5Q== +"@react-native-community/cli-tools@^9.2.1": + version "9.2.1" + resolved "https://registry.yarnpkg.com/@react-native-community/cli-tools/-/cli-tools-9.2.1.tgz#c332324b1ea99f9efdc3643649bce968aa98191c" + integrity sha512-bHmL/wrKmBphz25eMtoJQgwwmeCylbPxqFJnFSbkqJPXQz3ManQ6q/gVVMqFyz7D3v+riaus/VXz3sEDa97uiQ== dependencies: appdirsjs "^1.2.4" chalk "^4.1.2" find-up "^5.0.0" - lodash "^4.17.15" mime "^2.4.1" node-fetch "^2.6.0" open "^6.2.0" @@ -1777,36 +1771,33 @@ semver "^6.3.0" shell-quote "^1.7.3" -"@react-native-community/cli-types@^8.0.0": - version "8.0.0" - resolved "https://registry.yarnpkg.com/@react-native-community/cli-types/-/cli-types-8.0.0.tgz#72d24178e5ed1c2d271da43e0a4a4f59178f261a" - integrity sha512-1lZS1PEvMlFaN3Se1ksyoFWzMjk+YfKi490GgsqKJln9gvFm8tqVPdnXttI5Uf2DQf3BMse8Bk8dNH4oV6Ewow== +"@react-native-community/cli-types@^9.1.0": + version "9.1.0" + resolved "https://registry.yarnpkg.com/@react-native-community/cli-types/-/cli-types-9.1.0.tgz#dcd6a0022f62790fe1f67417f4690db938746aab" + integrity sha512-KDybF9XHvafLEILsbiKwz5Iobd+gxRaPyn4zSaAerBxedug4er5VUWa8Szy+2GeYKZzMh/gsb1o9lCToUwdT/g== dependencies: joi "^17.2.1" -"@react-native-community/cli@^8.0.4": - version "8.0.6" - resolved "https://registry.yarnpkg.com/@react-native-community/cli/-/cli-8.0.6.tgz#7aae37843ab8e44b75c477c1de69f4c902e599ef" - integrity sha512-E36hU/if3quQCfJHGWVkpsCnwtByRCwORuAX0r6yr1ebKktpKeEO49zY9PAu/Z1gfyxCtgluXY0HfRxjKRFXTg== - dependencies: - "@react-native-community/cli-clean" "^8.0.4" - "@react-native-community/cli-config" "^8.0.6" - "@react-native-community/cli-debugger-ui" "^8.0.0" - "@react-native-community/cli-doctor" "^8.0.6" - "@react-native-community/cli-hermes" "^8.0.5" - "@react-native-community/cli-plugin-metro" "^8.0.4" - "@react-native-community/cli-server-api" "^8.0.4" - "@react-native-community/cli-tools" "^8.0.4" - "@react-native-community/cli-types" "^8.0.0" +"@react-native-community/cli@9.3.5": + version "9.3.5" + resolved "https://registry.yarnpkg.com/@react-native-community/cli/-/cli-9.3.5.tgz#73626d3be8f5e2e6389f2555d126666fb8de4389" + integrity sha512-X+/xSysHsb0rXUWZKtXnKGhUNMRPxYzyhBc3VMld+ygPaFG57TAdK9rFGRu7NkIsRI6qffF/SukQPVlBZIfBHg== + dependencies: + "@react-native-community/cli-clean" "^9.2.1" + "@react-native-community/cli-config" "^9.2.1" + "@react-native-community/cli-debugger-ui" "^9.0.0" + "@react-native-community/cli-doctor" "^9.3.0" + "@react-native-community/cli-hermes" "^9.3.4" + "@react-native-community/cli-plugin-metro" "^9.3.3" + "@react-native-community/cli-server-api" "^9.2.1" + "@react-native-community/cli-tools" "^9.2.1" + "@react-native-community/cli-types" "^9.1.0" chalk "^4.1.2" - commander "^2.19.0" + commander "^9.4.0" execa "^1.0.0" find-up "^4.1.0" fs-extra "^8.1.0" graceful-fs "^4.1.3" - leven "^3.1.0" - lodash "^4.17.15" - minimist "^1.2.0" prompts "^2.4.0" semver "^6.3.0" @@ -2136,45 +2127,20 @@ argparse@^1.0.7: dependencies: sprintf-js "~1.0.2" -arr-diff@^4.0.0: - version "4.0.0" - resolved "https://registry.yarnpkg.com/arr-diff/-/arr-diff-4.0.0.tgz#d6461074febfec71e7e15235761a329a5dc7c520" - integrity sha512-YVIQ82gZPGBebQV/a8dar4AitzCQs0jjXwMPZllpXMaGjXPYVUawSxQrRsjhjupyVxEvbHgUmIhKVlND+j02kA== - -arr-flatten@^1.1.0: - version "1.1.0" - resolved "https://registry.yarnpkg.com/arr-flatten/-/arr-flatten-1.1.0.tgz#36048bbff4e7b47e136644316c99669ea5ae91f1" - integrity sha512-L3hKV5R/p5o81R7O02IGnwpDmkp6E982XhtbuwSe3O4qOtMMMtodicASA1Cny2U+aCXcNpml+m4dPsvsJ3jatg== - -arr-union@^3.1.0: - version "3.1.0" - resolved "https://registry.yarnpkg.com/arr-union/-/arr-union-3.1.0.tgz#e39b09aea9def866a8f206e288af63919bae39c4" - integrity sha512-sKpyeERZ02v1FeCZT8lrfJq5u6goHCtpTAzPwJYe7c8SPFOboNjNg1vz2L4VTn9T4PQxEx13TbXLmYUcS6Ug7Q== - array-union@^2.1.0: version "2.1.0" resolved "https://registry.yarnpkg.com/array-union/-/array-union-2.1.0.tgz#b798420adbeb1de828d84acd8a2e23d3efe85e8d" integrity sha512-HGyxoOTYUyCM6stUe6EJgnd4EoewAI7zMdfqO+kGjnlZmBDz/cR5pf8r/cR4Wq60sL/p0IkcjUEEPwS3GFrIyw== -array-unique@^0.3.2: - version "0.3.2" - resolved "https://registry.yarnpkg.com/array-unique/-/array-unique-0.3.2.tgz#a894b75d4bc4f6cd679ef3244a9fd8f46ae2d428" - integrity sha512-SleRWjh9JUud2wH1hPs9rZBZ33H6T9HOiL0uwGnGx9FpE6wKGyfWugmbkEOIs6qWrZhg0LWeLziLrEwQJhs5mQ== - asap@~2.0.6: version "2.0.6" resolved "https://registry.yarnpkg.com/asap/-/asap-2.0.6.tgz#e50347611d7e690943208bbdafebcbc2fb866d46" integrity sha512-BSHWgDSAiKs50o2Re8ppvp3seVHXSRM44cdSsT9FfNEUUZLOGWVCsiWaRPWM1Znn+mqZ1OfVZ3z3DWEzSp7hRA== -assign-symbols@^1.0.0: - version "1.0.0" - resolved "https://registry.yarnpkg.com/assign-symbols/-/assign-symbols-1.0.0.tgz#59667f41fadd4f20ccbc2bb96b8d4f7f78ec0367" - integrity sha512-Q+JC7Whu8HhmTdBph/Tq59IoRtoy6KAm5zzPv00WdujX82lbAL8K7WVjne7vdCsAmbF4AYaDOPyO3k0kl8qIrw== - -ast-types@0.14.2: - version "0.14.2" - resolved "https://registry.yarnpkg.com/ast-types/-/ast-types-0.14.2.tgz#600b882df8583e3cd4f2df5fa20fa83759d4bdfd" - integrity sha512-O0yuUDnZeQDL+ncNGlJ78BiO4jnYI3bvMsD5prT0/nsgijG/LpNBIr63gTjVTNsiGkgQhiyCShTgxt8oXOrklA== +ast-types@0.15.2: + version "0.15.2" + resolved "https://registry.yarnpkg.com/ast-types/-/ast-types-0.15.2.tgz#39ae4809393c4b16df751ee563411423e85fb49d" + integrity sha512-c27loCv9QkZinsa5ProX751khO9DJl/AcB5c2KNtA6NRvHKS0PgLfcftz72KVq504vB0Gku5s2kUZzDBvQWvHg== dependencies: tslib "^2.0.1" @@ -2203,11 +2169,6 @@ at-least-node@^1.0.0: resolved "https://registry.yarnpkg.com/at-least-node/-/at-least-node-1.0.0.tgz#602cd4b46e844ad4effc92a8011a3c46e0238dc2" integrity sha512-+q/t7Ekv1EDY2l6Gda6LLiX14rU9TV20Wa3ofeQmwPFZbOMo9DXrLbOjFaaclkXKWidIaopwAObQDqwWtGUjqg== -atob@^2.1.2: - version "2.1.2" - resolved "https://registry.yarnpkg.com/atob/-/atob-2.1.2.tgz#6d9517eb9e030d2436666651e86bd9f6f13533c9" - integrity sha512-Wm6ukoaOGJi/73p/cl2GvLjTI5JM1k/O14isD73YML8StrH/7/lRFgmg8nICZgD3bZZvjwCGxtMOD3wWNAu8cg== - babel-core@^7.0.0-bridge.0: version "7.0.0-bridge.0" resolved "https://registry.yarnpkg.com/babel-core/-/babel-core-7.0.0-bridge.0.tgz#95a492ddd90f9b4e9a4a1da14eb335b87b634ece" @@ -2353,19 +2314,6 @@ base64-js@^1.1.2, base64-js@^1.2.3, base64-js@^1.3.1, base64-js@^1.5.1: resolved "https://registry.yarnpkg.com/base64-js/-/base64-js-1.5.1.tgz#1b1b440160a5bf7ad40b650f095963481903930a" integrity sha512-AKpaYlHn8t4SVbOHCy+b5+KKgvR4vrsD8vbvrbiQJps7fKDTkjkDry6ji0rUJjC0kzbNePLwzxq8iypo41qeWA== -base@^0.11.1: - version "0.11.2" - resolved "https://registry.yarnpkg.com/base/-/base-0.11.2.tgz#7bde5ced145b6d551a90db87f83c558b4eb48a8f" - integrity sha512-5T6P4xPgpp0YDFvSWwEZ4NoE3aM4QBQXDzmVbraCkFj8zHM+mba8SyqB5DbZWyR7mYHo6Y7BdQo3MoA4m0TeQg== - dependencies: - cache-base "^1.0.1" - class-utils "^0.3.5" - component-emitter "^1.2.1" - define-property "^1.0.0" - isobject "^3.0.1" - mixin-deep "^1.2.0" - pascalcase "^0.1.1" - big-integer@1.6.x: version "1.6.51" resolved "https://registry.yarnpkg.com/big-integer/-/big-integer-1.6.51.tgz#0df92a5d9880560d3ff2d5fd20245c889d130686" @@ -2402,22 +2350,6 @@ brace-expansion@^1.1.7: balanced-match "^1.0.0" concat-map "0.0.1" -braces@^2.3.1: - version "2.3.2" - resolved "https://registry.yarnpkg.com/braces/-/braces-2.3.2.tgz#5979fd3f14cd531565e5fa2df1abfff1dfaee729" - integrity sha512-aNdbnj9P8PjdXU4ybaWLK2IF3jc/EoDYbC7AazW6to3TRsfXxscC9UXOB5iDiEQrkyIbWp2SLQda4+QAa7nc3w== - dependencies: - arr-flatten "^1.1.0" - array-unique "^0.3.2" - extend-shallow "^2.0.1" - fill-range "^4.0.0" - isobject "^3.0.1" - repeat-element "^1.1.2" - snapdragon "^0.8.1" - snapdragon-node "^2.0.1" - split-string "^3.0.2" - to-regex "^3.0.1" - braces@^3.0.2: version "3.0.3" resolved "https://registry.yarnpkg.com/braces/-/braces-3.0.3.tgz#490332f40919452272d55a8480adc0c441358789" @@ -2474,21 +2406,6 @@ bytes@3.0.0: resolved "https://registry.yarnpkg.com/bytes/-/bytes-3.0.0.tgz#d32815404d689699f85a4ea4fa8755dd13a96048" integrity sha512-pMhOfFDPiv9t5jjIXkHosWmkSyQbvsgEVNkz0ERHbuLh2T/7j4Mqqpz523Fe8MVY89KC6Sh/QfS2sM+SjgFDcw== -cache-base@^1.0.1: - version "1.0.1" - resolved "https://registry.yarnpkg.com/cache-base/-/cache-base-1.0.1.tgz#0a7f46416831c8b662ee36fe4e7c59d76f666ab2" - integrity sha512-AKcdTnFSWATd5/GCPRxr2ChwIJ85CeyrEyjRHlKxQ56d4XJMGym0uAiKn0xbLOGOl3+yRpOTi484dVCEc5AUzQ== - dependencies: - collection-visit "^1.0.0" - component-emitter "^1.2.1" - get-value "^2.0.6" - has-value "^1.0.0" - isobject "^3.0.1" - set-value "^2.0.0" - to-object-path "^0.3.0" - union-value "^1.0.0" - unset-value "^1.0.0" - call-bind@^1.0.0: version "1.0.2" resolved "https://registry.yarnpkg.com/call-bind/-/call-bind-1.0.2.tgz#b1d4e89e688119c3c9a903ad30abb2f6a919be3c" @@ -2573,16 +2490,6 @@ cjs-module-lexer@^1.0.0: resolved "https://registry.yarnpkg.com/cjs-module-lexer/-/cjs-module-lexer-1.2.2.tgz#9f84ba3244a512f3a54e5277e8eef4c489864e40" integrity sha512-cOU9usZw8/dXIXKtwa8pM0OTJQuJkxMN6w30csNRUerHfeQ5R6U3kkU/FtJeIf3M202OHfY2U8ccInBG7/xogA== -class-utils@^0.3.5: - version "0.3.6" - resolved "https://registry.yarnpkg.com/class-utils/-/class-utils-0.3.6.tgz#f93369ae8b9a7ce02fd41faad0ca83033190c463" - integrity sha512-qOhPa/Fj7s6TY8H8esGu5QNpMMQxz79h+urzrNYN6mn+9BnxlDGf5QZ+XeCDsxSjPqsSR56XOZOJmpeurnLMeg== - dependencies: - arr-union "^3.1.0" - define-property "^0.2.5" - isobject "^3.0.0" - static-extend "^0.1.1" - clean-stack@^2.0.0: version "2.2.0" resolved "https://registry.yarnpkg.com/clean-stack/-/clean-stack-2.2.0.tgz#ee8472dbb129e727b31e8a10a427dee9dfe4008b" @@ -2642,14 +2549,6 @@ collect-v8-coverage@^1.0.0: resolved "https://registry.yarnpkg.com/collect-v8-coverage/-/collect-v8-coverage-1.0.1.tgz#cc2c8e94fc18bbdffe64d6534570c8a673b27f59" integrity sha512-iBPtljfCNcTKNAto0KEtDfZ3qzjJvqE3aTGZsbhjSBlorqpXJlaWWtPO35D+ZImoC3KWejX64o+yPGxhWSTzfg== -collection-visit@^1.0.0: - version "1.0.0" - resolved "https://registry.yarnpkg.com/collection-visit/-/collection-visit-1.0.0.tgz#4bc0373c164bc3291b4d368c829cf1a80a59dca0" - integrity sha1-S8A3PBZLwykbTTaMgpzxqApZ3KA= - dependencies: - map-visit "^1.0.0" - object-visit "^1.0.0" - color-convert@^1.9.0: version "1.9.3" resolved "https://registry.yarnpkg.com/color-convert/-/color-convert-1.9.3.tgz#bb71850690e1f136567de629d2d5471deda4c1e8" @@ -2691,10 +2590,10 @@ command-exists@^1.2.8: resolved "https://registry.yarnpkg.com/command-exists/-/command-exists-1.2.9.tgz#c50725af3808c8ab0260fd60b01fbfa25b954f69" integrity sha512-LTQ/SGc+s0Xc0Fu5WaKnR0YiygZkm9eKFvyS+fRsU7/ZWFF8ykFM6Pc9aCVf1+xasOOZpO3BAVgVrKvsqKHV7w== -commander@^2.19.0: - version "2.20.3" - resolved "https://registry.yarnpkg.com/commander/-/commander-2.20.3.tgz#fd485e84c03eb4881c20722ba48035e8531aeb33" - integrity sha512-GpVkmM8vF2vQUkj2LvZmD35JxeJOLCwJ9cUkugyk2nuhbv3+mJvpLYYt+0+USMxE+oj+ey/lJEnhZw75x/OMcQ== +commander@^9.4.0: + version "9.5.0" + resolved "https://registry.yarnpkg.com/commander/-/commander-9.5.0.tgz#bc08d1eb5cedf7ccb797a96199d41c7bc3e60d30" + integrity sha512-KRs7WVDKg86PWiuAqhDrAQnTXZKraVcCc6vFdL14qrZ/DcWwuRo7VoiYXalXO7S5GKpqYiVEwCbgFDfxNHKJBQ== commander@~2.13.0: version "2.13.0" @@ -2706,11 +2605,6 @@ commondir@^1.0.1: resolved "https://registry.yarnpkg.com/commondir/-/commondir-1.0.1.tgz#ddd800da0c66127393cca5950ea968a3aaf1253b" integrity sha1-3dgA2gxmEnOTzKWVDqloo6rxJTs= -component-emitter@^1.2.1: - version "1.3.0" - resolved "https://registry.yarnpkg.com/component-emitter/-/component-emitter-1.3.0.tgz#16e4070fba8ae29b679f2215853ee181ab2eabc0" - integrity sha512-Rd3se6QB+sO1TwqZjscQrurpEPIfO0/yYnSin6Q/rD3mOutHvUrCAhJub3r90uNb+SESBuE0QYoB90YdfatsRg== - compressible@~2.0.16: version "2.0.18" resolved "https://registry.yarnpkg.com/compressible/-/compressible-2.0.18.tgz#af53cca6b070d4c3c0750fbd77286a6d7cc46fba" @@ -2753,11 +2647,6 @@ convert-source-map@^1.4.0, convert-source-map@^1.6.0, convert-source-map@^1.7.0: dependencies: safe-buffer "~5.1.1" -copy-descriptor@^0.1.0: - version "0.1.1" - resolved "https://registry.yarnpkg.com/copy-descriptor/-/copy-descriptor-0.1.1.tgz#676f6eb3c39997c2ee1ac3a924fd6124748f578d" - integrity sha1-Z29us8OZl8LuGsOpJP1hJHSPV40= - core-js-compat@^3.21.0, core-js-compat@^3.22.1: version "3.22.7" resolved "https://registry.yarnpkg.com/core-js-compat/-/core-js-compat-3.22.7.tgz#8359eb66ecbf726dd0cfced8e48d5e73f3224239" @@ -2848,7 +2737,7 @@ dayjs@^1.8.15: resolved "https://registry.yarnpkg.com/dayjs/-/dayjs-1.11.2.tgz#fa0f5223ef0d6724b3d8327134890cfe3d72fbe5" integrity sha512-F4LXf1OeU9hrSYRPTTj/6FbO4HTjPKXvEIC1P2kcnFurViINCVk3ZV0xAS3XVx9MkMsXbbqlK6hjseaYbgKEHw== -debug@2.6.9, debug@^2.2.0, debug@^2.3.3: +debug@2.6.9, debug@^2.2.0: version "2.6.9" resolved "https://registry.yarnpkg.com/debug/-/debug-2.6.9.tgz#5d128515df134ff327e90a4c93f4e077a536341f" integrity sha512-bC7ElrdJaJnPbAP+1EotYvqZsb3ecl5wi6Bfi6BJTUcNowp6cvspg0jXznRTKDjm/E7AdgFBVeAPVMNcKGsHMA== @@ -2872,11 +2761,6 @@ decimal.js@^10.2.1: resolved "https://registry.yarnpkg.com/decimal.js/-/decimal.js-10.3.1.tgz#d8c3a444a9c6774ba60ca6ad7261c3a94fd5e783" integrity sha512-V0pfhfr8suzyPGOx3nmq4aHqabehUZn6Ch9kyFpV79TGDTWFmHqUqXdabR7QHqxzrYolF4+tVmJhUG4OURg5dQ== -decode-uri-component@^0.2.0: - version "0.2.2" - resolved "https://registry.yarnpkg.com/decode-uri-component/-/decode-uri-component-0.2.2.tgz#e69dbe25d37941171dd540e024c444cd5188e1e9" - integrity sha512-FqUYQ+8o158GyGTrMFJms9qh3CqTKvAqgqsTnkLI8sKu0028orqBhxNMFkFen0zGyg6epACD32pjVk58ngIErQ== - dedent@^0.7.0: version "0.7.0" resolved "https://registry.yarnpkg.com/dedent/-/dedent-0.7.0.tgz#2495ddbaf6eb874abb0e1be9df22d2e5a544326c" @@ -2912,28 +2796,6 @@ define-properties@^1.1.3: has-property-descriptors "^1.0.0" object-keys "^1.1.1" -define-property@^0.2.5: - version "0.2.5" - resolved "https://registry.yarnpkg.com/define-property/-/define-property-0.2.5.tgz#c35b1ef918ec3c990f9a5bc57be04aacec5c8116" - integrity sha1-w1se+RjsPJkPmlvFe+BKrOxcgRY= - dependencies: - is-descriptor "^0.1.0" - -define-property@^1.0.0: - version "1.0.0" - resolved "https://registry.yarnpkg.com/define-property/-/define-property-1.0.0.tgz#769ebaaf3f4a63aad3af9e8d304c9bbe79bfb0e6" - integrity sha1-dp66rz9KY6rTr56NMEybvnm/sOY= - dependencies: - is-descriptor "^1.0.0" - -define-property@^2.0.2: - version "2.0.2" - resolved "https://registry.yarnpkg.com/define-property/-/define-property-2.0.2.tgz#d459689e8d654ba77e02a817f8710d702cb16e9d" - integrity sha512-jwK2UV4cnPpbcG7+VRARKTZPUWowwXA8bzH5NP6ud0oeAxyYPuGZUAC7hMugpCdz4BeSZl2Dl9k66CHJ/46ZYQ== - dependencies: - is-descriptor "^1.0.2" - isobject "^3.0.1" - del@^6.0.0: version "6.1.1" resolved "https://registry.yarnpkg.com/del/-/del-6.1.1.tgz#3b70314f1ec0aa325c6b14eb36b95786671edb7a" @@ -3156,19 +3018,6 @@ exit@^0.1.2: resolved "https://registry.yarnpkg.com/exit/-/exit-0.1.2.tgz#0632638f8d877cc82107d30a0fff1a17cba1cd0c" integrity sha1-BjJjj42HfMghB9MKD/8aF8uhzQw= -expand-brackets@^2.1.4: - version "2.1.4" - resolved "https://registry.yarnpkg.com/expand-brackets/-/expand-brackets-2.1.4.tgz#b77735e315ce30f6b6eff0f83b04151a22449622" - integrity sha1-t3c14xXOMPa27/D4OwQVGiJEliI= - dependencies: - debug "^2.3.3" - define-property "^0.2.5" - extend-shallow "^2.0.1" - posix-character-classes "^0.1.0" - regex-not "^1.0.0" - snapdragon "^0.8.1" - to-regex "^3.0.1" - expect@^27.5.1: version "27.5.1" resolved "https://registry.yarnpkg.com/expect/-/expect-27.5.1.tgz#83ce59f1e5bdf5f9d2b94b61d2050db48f3fef74" @@ -3179,35 +3028,6 @@ expect@^27.5.1: jest-matcher-utils "^27.5.1" jest-message-util "^27.5.1" -extend-shallow@^2.0.1: - version "2.0.1" - resolved "https://registry.yarnpkg.com/extend-shallow/-/extend-shallow-2.0.1.tgz#51af7d614ad9a9f610ea1bafbb989d6b1c56890f" - integrity sha1-Ua99YUrZqfYQ6huvu5idaxxWiQ8= - dependencies: - is-extendable "^0.1.0" - -extend-shallow@^3.0.0, extend-shallow@^3.0.2: - version "3.0.2" - resolved "https://registry.yarnpkg.com/extend-shallow/-/extend-shallow-3.0.2.tgz#26a71aaf073b39fb2127172746131c2704028db8" - integrity sha1-Jqcarwc7OfshJxcnRhMcJwQCjbg= - dependencies: - assign-symbols "^1.0.0" - is-extendable "^1.0.1" - -extglob@^2.0.4: - version "2.0.4" - resolved "https://registry.yarnpkg.com/extglob/-/extglob-2.0.4.tgz#ad00fe4dc612a9232e8718711dc5cb5ab0285543" - integrity sha512-Nmb6QXkELsuBr24CJSkilo6UHHgbekK5UiZgfE6UHD3Eb27YC6oD+bhcT+tJ6cl8dmsgdQxnWlcry8ksBIBLpw== - dependencies: - array-unique "^0.3.2" - define-property "^1.0.0" - expand-brackets "^2.1.4" - extend-shallow "^2.0.1" - fragment-cache "^0.2.1" - regex-not "^1.0.0" - snapdragon "^0.8.1" - to-regex "^3.0.1" - fast-glob@^3.2.9: version "3.2.11" resolved "https://registry.yarnpkg.com/fast-glob/-/fast-glob-3.2.11.tgz#a1172ad95ceb8a16e20caa5c5e56480e5129c1d9" @@ -3243,16 +3063,6 @@ fb-watchman@^2.0.0: dependencies: bser "2.1.1" -fill-range@^4.0.0: - version "4.0.0" - resolved "https://registry.yarnpkg.com/fill-range/-/fill-range-4.0.0.tgz#d544811d428f98eb06a63dc402d2403c328c38f7" - integrity sha1-1USBHUKPmOsGpj3EAtJAPDKMOPc= - dependencies: - extend-shallow "^2.0.1" - is-number "^3.0.0" - repeat-string "^1.6.1" - to-regex-range "^2.1.0" - fill-range@^7.1.1: version "7.1.1" resolved "https://registry.yarnpkg.com/fill-range/-/fill-range-7.1.1.tgz#44265d3cac07e3ea7dc247516380643754a05292" @@ -3315,11 +3125,6 @@ flow-parser@^0.121.0: resolved "https://registry.yarnpkg.com/flow-parser/-/flow-parser-0.121.0.tgz#9f9898eaec91a9f7c323e9e992d81ab5c58e618f" integrity sha512-1gIBiWJNR0tKUNv8gZuk7l9rVX06OuLzY9AoGio7y/JT4V1IZErEMEq2TJS+PFcw/y0RshZ1J/27VfK1UQzYVg== -for-in@^1.0.2: - version "1.0.2" - resolved "https://registry.yarnpkg.com/for-in/-/for-in-1.0.2.tgz#81068d295a8142ec0ac726c6e2200c30fb6d5e80" - integrity sha1-gQaNKVqBQuwKxybG4iAMMPttXoA= - form-data@^3.0.0: version "3.0.1" resolved "https://registry.yarnpkg.com/form-data/-/form-data-3.0.1.tgz#ebd53791b78356a99af9a300d4282c4d5eb9755f" @@ -3329,13 +3134,6 @@ form-data@^3.0.0: combined-stream "^1.0.8" mime-types "^2.1.12" -fragment-cache@^0.2.1: - version "0.2.1" - resolved "https://registry.yarnpkg.com/fragment-cache/-/fragment-cache-0.2.1.tgz#4290fad27f13e89be7f33799c6bc5a0abfff0d19" - integrity sha1-QpD60n8T6Jvn8zeZxrxaCr//DRk= - dependencies: - map-cache "^0.2.2" - fresh@0.5.2: version "0.5.2" resolved "https://registry.yarnpkg.com/fresh/-/fresh-0.5.2.tgz#3d8cadd90d976569fa835ab1f8e4b23a105605a7" @@ -3374,6 +3172,11 @@ fs.realpath@^1.0.0: resolved "https://registry.yarnpkg.com/fs.realpath/-/fs.realpath-1.0.0.tgz#1504ad2523158caa40db4a2787cb01411994ea4f" integrity sha512-OO0pH2lK6a0hZnAdau5ItzHPI6pUlvI7jMVnxUQRtw4owF2wk8lOSabtGDCTP4Ggrg2MbGnWO9X8K1t4+fGMDw== +fsevents@^2.1.2: + version "2.3.3" + resolved "https://registry.yarnpkg.com/fsevents/-/fsevents-2.3.3.tgz#cac6407785d03675a2a5e1a5305c697b347d90d6" + integrity sha512-5xoDfX+fL7faATnagmWPpbFtwh/R77WmMMqqHGS65C3vvB0YHrgF+B1YmZ3441tMj5n63k0212XNoJwzlhffQw== + fsevents@^2.3.2: version "2.3.2" resolved "https://registry.yarnpkg.com/fsevents/-/fsevents-2.3.2.tgz#8a526f78b8fdf4623b709e0b975c52c24c02fd1a" @@ -3427,11 +3230,6 @@ get-stream@^6.0.0: resolved "https://registry.yarnpkg.com/get-stream/-/get-stream-6.0.1.tgz#a262d8eef67aced57c2852ad6167526a43cbf7b7" integrity sha512-ts6Wi+2j3jQjqi70w5AlN8DFnkSwC+MqmxEzdEALB2qXZYV3X/b1CTfgPLGJNMeAWxdPfU8FO1ms3NUfaHCPYg== -get-value@^2.0.3, get-value@^2.0.6: - version "2.0.6" - resolved "https://registry.yarnpkg.com/get-value/-/get-value-2.0.6.tgz#dc15ca1c672387ca76bd37ac0a395ba2042a2c28" - integrity sha1-3BXKHGcjh8p2vTesCjlbogQqLCg= - getenv@^1.0.0: version "1.0.0" resolved "https://registry.yarnpkg.com/getenv/-/getenv-1.0.0.tgz#874f2e7544fbca53c7a4738f37de8605c3fcfc31" @@ -3512,37 +3310,6 @@ has-symbols@^1.0.1: resolved "https://registry.yarnpkg.com/has-symbols/-/has-symbols-1.0.3.tgz#bb7b2c4349251dce87b125f7bdf874aa7c8b39f8" integrity sha512-l3LCuF6MgDNwTDKkdYGEihYjt5pRPbEg46rtlmnSPlUbgmB8LOIrKJbYYFBSbnPaJexMKtiPO8hmeRjRz2Td+A== -has-value@^0.3.1: - version "0.3.1" - resolved "https://registry.yarnpkg.com/has-value/-/has-value-0.3.1.tgz#7b1f58bada62ca827ec0a2078025654845995e1f" - integrity sha1-ex9YutpiyoJ+wKIHgCVlSEWZXh8= - dependencies: - get-value "^2.0.3" - has-values "^0.1.4" - isobject "^2.0.0" - -has-value@^1.0.0: - version "1.0.0" - resolved "https://registry.yarnpkg.com/has-value/-/has-value-1.0.0.tgz#18b281da585b1c5c51def24c930ed29a0be6b177" - integrity sha1-GLKB2lhbHFxR3vJMkw7SmgvmsXc= - dependencies: - get-value "^2.0.6" - has-values "^1.0.0" - isobject "^3.0.0" - -has-values@^0.1.4: - version "0.1.4" - resolved "https://registry.yarnpkg.com/has-values/-/has-values-0.1.4.tgz#6d61de95d91dfca9b9a02089ad384bff8f62b771" - integrity sha1-bWHeldkd/Km5oCCJrThL/49it3E= - -has-values@^1.0.0: - version "1.0.0" - resolved "https://registry.yarnpkg.com/has-values/-/has-values-1.0.0.tgz#95b0b63fec2146619a6fe57fe75628d5a39efe4f" - integrity sha1-lbC2P+whRmGab+V/51Yo1aOe/k8= - dependencies: - is-number "^3.0.0" - kind-of "^4.0.0" - has@^1.0.3: version "1.0.3" resolved "https://registry.yarnpkg.com/has/-/has-1.0.3.tgz#722d7cbfc1f6aa8241f16dd814e011e1f41e8796" @@ -3550,22 +3317,17 @@ has@^1.0.3: dependencies: function-bind "^1.1.1" -hermes-engine@~0.11.0: - version "0.11.0" - resolved "https://registry.yarnpkg.com/hermes-engine/-/hermes-engine-0.11.0.tgz#bb224730d230a02a5af02c4e090d1f52d57dd3db" - integrity sha512-7aMUlZja2IyLYAcZ69NBnwJAR5ZOYlSllj0oMpx08a8HzxHOys0eKCzfphrf6D0vX1JGO1QQvVsQKe6TkYherw== - -hermes-estree@0.6.0: - version "0.6.0" - resolved "https://registry.yarnpkg.com/hermes-estree/-/hermes-estree-0.6.0.tgz#e866fddae1b80aec65fe2ae450a5f2070ad54033" - integrity sha512-2YTGzJCkhdmT6VuNprWjXnvTvw/3iPNw804oc7yknvQpNKo+vJGZmtvLLCghOZf0OwzKaNAzeIMp71zQbNl09w== +hermes-estree@0.8.0: + version "0.8.0" + resolved "https://registry.yarnpkg.com/hermes-estree/-/hermes-estree-0.8.0.tgz#530be27243ca49f008381c1f3e8b18fb26bf9ec0" + integrity sha512-W6JDAOLZ5pMPMjEiQGLCXSSV7pIBEgRR5zGkxgmzGSXHOxqV5dC/M1Zevqpbm9TZDE5tu358qZf8Vkzmsc+u7Q== -hermes-parser@0.6.0: - version "0.6.0" - resolved "https://registry.yarnpkg.com/hermes-parser/-/hermes-parser-0.6.0.tgz#00d14e91bca830b3c1457050fa4187400cb96328" - integrity sha512-Vf58jBZca2+QBLR9h7B7mdg8oFz2g5ILz1iVouZ5DOrOrAfBmPfJjdjDT8jrO0f+iJ4/hSRrQHqHIjSnTaLUDQ== +hermes-parser@0.8.0: + version "0.8.0" + resolved "https://registry.yarnpkg.com/hermes-parser/-/hermes-parser-0.8.0.tgz#116dceaba32e45b16d6aefb5c4c830eaeba2d257" + integrity sha512-yZKalg1fTYG5eOiToLUaw69rQfZq/fi+/NtEXRU7N87K/XobNRhRWorh80oSge2lWUiZfTgUvRJH+XgZWrhoqA== dependencies: - hermes-estree "0.6.0" + hermes-estree "0.8.0" hermes-profile-transformer@^0.0.6: version "0.0.6" @@ -3713,30 +3475,11 @@ is-absolute@^1.0.0: is-relative "^1.0.0" is-windows "^1.0.1" -is-accessor-descriptor@^0.1.6: - version "0.1.6" - resolved "https://registry.yarnpkg.com/is-accessor-descriptor/-/is-accessor-descriptor-0.1.6.tgz#a9e12cb3ae8d876727eeef3843f8a0897b5c98d6" - integrity sha1-qeEss66Nh2cn7u84Q/igiXtcmNY= - dependencies: - kind-of "^3.0.2" - -is-accessor-descriptor@^1.0.0: - version "1.0.0" - resolved "https://registry.yarnpkg.com/is-accessor-descriptor/-/is-accessor-descriptor-1.0.0.tgz#169c2f6d3df1f992618072365c9b0ea1f6878656" - integrity sha512-m5hnHTkcVsPfqx3AKlyttIPb7J+XykHvJP2B9bZDjlhLIoEq4XoK64Vg7boZlVWYK6LUY94dYPEE7Lh0ZkZKcQ== - dependencies: - kind-of "^6.0.0" - is-arrayish@^0.2.1: version "0.2.1" resolved "https://registry.yarnpkg.com/is-arrayish/-/is-arrayish-0.2.1.tgz#77c99840527aa8ecb1a8ba697b80645a7a926a9d" integrity sha1-d8mYQFJ6qOyxqLppe4BkWnqSap0= -is-buffer@^1.1.5: - version "1.1.6" - resolved "https://registry.yarnpkg.com/is-buffer/-/is-buffer-1.1.6.tgz#efaa2ea9daa0d7ab2ea13a97b2b8ad51fefbe8be" - integrity sha512-NcdALwpXkTm5Zvvbk7owOUSvVvBKDgKP5/ewfXEznmQFfs4ZRmanOeKBTjRVjka3QFoN6XJ+9F3USqfHqTaU5w== - is-core-module@^2.8.1: version "2.9.0" resolved "https://registry.yarnpkg.com/is-core-module/-/is-core-module-2.9.0.tgz#e1c34429cd51c6dd9e09e0799e396e27b19a9c69" @@ -3744,55 +3487,11 @@ is-core-module@^2.8.1: dependencies: has "^1.0.3" -is-data-descriptor@^0.1.4: - version "0.1.4" - resolved "https://registry.yarnpkg.com/is-data-descriptor/-/is-data-descriptor-0.1.4.tgz#0b5ee648388e2c860282e793f1856fec3f301b56" - integrity sha1-C17mSDiOLIYCgueT8YVv7D8wG1Y= - dependencies: - kind-of "^3.0.2" - -is-data-descriptor@^1.0.0: - version "1.0.0" - resolved "https://registry.yarnpkg.com/is-data-descriptor/-/is-data-descriptor-1.0.0.tgz#d84876321d0e7add03990406abbbbd36ba9268c7" - integrity sha512-jbRXy1FmtAoCjQkVmIVYwuuqDFUbaOeDjmed1tOGPrsMhtJA4rD9tkgA0F1qJ3gRFRXcHYVkdeaP50Q5rE/jLQ== - dependencies: - kind-of "^6.0.0" - -is-descriptor@^0.1.0: - version "0.1.6" - resolved "https://registry.yarnpkg.com/is-descriptor/-/is-descriptor-0.1.6.tgz#366d8240dde487ca51823b1ab9f07a10a78251ca" - integrity sha512-avDYr0SB3DwO9zsMov0gKCESFYqCnE4hq/4z3TdUlukEy5t9C0YRq7HLrsN52NAcqXKaepeCD0n+B0arnVG3Hg== - dependencies: - is-accessor-descriptor "^0.1.6" - is-data-descriptor "^0.1.4" - kind-of "^5.0.0" - -is-descriptor@^1.0.0, is-descriptor@^1.0.2: - version "1.0.2" - resolved "https://registry.yarnpkg.com/is-descriptor/-/is-descriptor-1.0.2.tgz#3b159746a66604b04f8c81524ba365c5f14d86ec" - integrity sha512-2eis5WqQGV7peooDyLmNEPUrps9+SXX5c9pL3xEB+4e9HnGuDa7mB7kHxHw4CbqS9k1T2hOH3miL8n8WtiYVtg== - dependencies: - is-accessor-descriptor "^1.0.0" - is-data-descriptor "^1.0.0" - kind-of "^6.0.2" - is-directory@^0.3.1: version "0.3.1" resolved "https://registry.yarnpkg.com/is-directory/-/is-directory-0.3.1.tgz#61339b6f2475fc772fd9c9d83f5c8575dc154ae1" integrity sha1-YTObbyR1/Hcv2cnYP1yFddwVSuE= -is-extendable@^0.1.0, is-extendable@^0.1.1: - version "0.1.1" - resolved "https://registry.yarnpkg.com/is-extendable/-/is-extendable-0.1.1.tgz#62b110e289a471418e3ec36a617d472e301dfc89" - integrity sha1-YrEQ4omkcUGOPsNqYX1HLjAd/Ik= - -is-extendable@^1.0.1: - version "1.0.1" - resolved "https://registry.yarnpkg.com/is-extendable/-/is-extendable-1.0.1.tgz#a7470f9e426733d81bd81e1155264e3a3507cab4" - integrity sha512-arnXMxT1hhoKo9k1LZdmlNyJdDDfy2v0fXjFlmok4+i8ul/6WlbVge9bhM74OpNPQPMGUToDtz+KXa1PneJxOA== - dependencies: - is-plain-object "^2.0.4" - is-extglob@^2.1.1: version "2.1.1" resolved "https://registry.yarnpkg.com/is-extglob/-/is-extglob-2.1.1.tgz#a88c02535791f02ed37c76a1b9ea9773c833f8c2" @@ -3841,13 +3540,6 @@ is-interactive@^1.0.0: resolved "https://registry.yarnpkg.com/is-interactive/-/is-interactive-1.0.0.tgz#cea6e6ae5c870a7b0a0004070b7b587e0252912e" integrity sha512-2HvIEKRoqS62guEC+qBjpvRubdX910WCMuJTZ+I9yvqKU2/12eSL549HMwtabb4oupdj2sMP50k+XJfB/8JE6w== -is-number@^3.0.0: - version "3.0.0" - resolved "https://registry.yarnpkg.com/is-number/-/is-number-3.0.0.tgz#24fd6201a4782cf50561c810276afc7d12d71195" - integrity sha1-JP1iAaR4LPUFYcgQJ2r8fRLXEZU= - dependencies: - kind-of "^3.0.2" - is-number@^7.0.0: version "7.0.0" resolved "https://registry.yarnpkg.com/is-number/-/is-number-7.0.0.tgz#7535345b896734d5f80c4d06c50955527a14f12b" @@ -3863,7 +3555,7 @@ is-path-inside@^3.0.2: resolved "https://registry.yarnpkg.com/is-path-inside/-/is-path-inside-3.0.3.tgz#d231362e53a07ff2b0e0ea7fed049161ffd16283" integrity sha512-Fd4gABb+ycGAmKou8eMftCupSir5lRxqf4aD/vd0cD2qc4HL07OjCeuHMr8Ro4CoMaeCKDB0/ECBOVWjTwUvPQ== -is-plain-object@^2.0.3, is-plain-object@^2.0.4: +is-plain-object@^2.0.4: version "2.0.4" resolved "https://registry.yarnpkg.com/is-plain-object/-/is-plain-object-2.0.4.tgz#2c163b3fafb1b606d9d17928f05c2a1c38e07677" integrity sha512-h5PpgXkWitc38BBMYawTYMWJHFZJVnBquFE57xFpjB8pJFiF6gZ+bU+WyI/yqXiFR5mdLsgYNaPe8uao6Uv9Og== @@ -3909,7 +3601,7 @@ is-unicode-supported@^0.1.0: resolved "https://registry.yarnpkg.com/is-unicode-supported/-/is-unicode-supported-0.1.0.tgz#3f26c76a809593b52bfa2ecb5710ed2779b522a7" integrity sha512-knxG2q4UC3u8stRGyAVJCOdxFmv5DZiRcdlIaAQXAbSfJya+OhopNotLQrstBhququ4ZpuKbDc/8S6mgXgPFPw== -is-windows@^1.0.1, is-windows@^1.0.2: +is-windows@^1.0.1: version "1.0.2" resolved "https://registry.yarnpkg.com/is-windows/-/is-windows-1.0.2.tgz#d1850eb9791ecd18e6182ce12a30f396634bb19d" integrity sha512-eXK1UInq2bPmjyX6e3VHIzMLobc4J94i4AWn+Hpq3OU5KkrRC96OAcR3PRJ/pGu6m8TRnBHP9dkXQVsT/COVIA== @@ -3919,7 +3611,7 @@ is-wsl@^1.1.0: resolved "https://registry.yarnpkg.com/is-wsl/-/is-wsl-1.1.0.tgz#1f16e4aa22b04d1336b66188a66af3c600c3a66d" integrity sha1-HxbkqiKwTRM2tmGIpmrzxgDDpm0= -isarray@1.0.0, isarray@~1.0.0: +isarray@~1.0.0: version "1.0.0" resolved "https://registry.yarnpkg.com/isarray/-/isarray-1.0.0.tgz#bb935d48582cba168c06834957a54a3e07124f11" integrity sha1-u5NdSFgsuhaMBoNJV6VKPgcSTxE= @@ -3929,14 +3621,7 @@ isexe@^2.0.0: resolved "https://registry.yarnpkg.com/isexe/-/isexe-2.0.0.tgz#e8fbf374dc556ff8947a10dcb0572d633f2cfa10" integrity sha1-6PvzdNxVb/iUehDcsFctYz8s+hA= -isobject@^2.0.0: - version "2.1.0" - resolved "https://registry.yarnpkg.com/isobject/-/isobject-2.1.0.tgz#f065561096a3f1da2ef46272f815c840d87e0c89" - integrity sha1-8GVWEJaj8dou9GJy+BXIQNh+DIk= - dependencies: - isarray "1.0.0" - -isobject@^3.0.0, isobject@^3.0.1: +isobject@^3.0.1: version "3.0.1" resolved "https://registry.yarnpkg.com/isobject/-/isobject-3.0.1.tgz#4e431e92b11a9731636aa1f9c8d1ccbcfdab78df" integrity sha1-TkMekrEalzFjaqH5yNHMvP2reN8= @@ -4128,7 +3813,7 @@ jest-get-type@^27.5.1: resolved "https://registry.yarnpkg.com/jest-get-type/-/jest-get-type-27.5.1.tgz#3cd613c507b0f7ace013df407a1c1cd578bcb4f1" integrity sha512-2KY95ksYSaK7DMBWQn6dQz3kqAf3BB64y2udeG+hv4KfSOb9qwcYQstTJc1KCbsix+wLZWZYN8t7nwX3GOBLRw== -jest-haste-map@^27.3.1, jest-haste-map@^27.5.1: +jest-haste-map@^27.5.1: version "27.5.1" resolved "https://registry.yarnpkg.com/jest-haste-map/-/jest-haste-map-27.5.1.tgz#9fd8bd7e7b4fa502d9c6164c5640512b4e811e7f" integrity sha512-7GgkZ4Fw4NFbMSDSpZwXeBiIbx+t/46nJ2QitkOjvwPYyZmqttu2TDSimMHP1EkPOi4xUZAN1doE5Vd25H4Jng== @@ -4217,7 +3902,7 @@ jest-pnp-resolver@^1.2.2: resolved "https://registry.yarnpkg.com/jest-pnp-resolver/-/jest-pnp-resolver-1.2.2.tgz#b704ac0ae028a89108a4d040b3f919dfddc8e33c" integrity sha512-olV41bKSMm8BdnuMsewT4jqlZ8+3TCARAXjZGT9jcoSnrfUnRCqnMoF9XEeoWjbzObpqF9dRhHQj0Xb9QdF6/w== -jest-regex-util@^27.5.1: +jest-regex-util@^27.0.6, jest-regex-util@^27.5.1: version "27.5.1" resolved "https://registry.yarnpkg.com/jest-regex-util/-/jest-regex-util-27.5.1.tgz#4da143f7e9fd1e542d4aa69617b38e4a78365b95" integrity sha512-4bfKq2zie+x16okqDXjXn9ql2B0dScQu+vcwe4TvFVhkVyuWLqpZrZtXxLLWoXYgn0E87I6r6GRYHF7wFZBUvg== @@ -4302,7 +3987,7 @@ jest-runtime@^27.5.1: slash "^3.0.0" strip-bom "^4.0.0" -jest-serializer@^27.5.1: +jest-serializer@^27.0.6, jest-serializer@^27.5.1: version "27.5.1" resolved "https://registry.yarnpkg.com/jest-serializer/-/jest-serializer-27.5.1.tgz#81438410a30ea66fd57ff730835123dea1fb1f64" integrity sha512-jZCyo6iIxO1aqUxpuBlwTDMkzOAJS4a3eYz3YzgxxVQFwLeSA7Jfq5cbqCY+JLvTDrWirgusI/0KwxKMgrdf7w== @@ -4338,7 +4023,7 @@ jest-snapshot@^27.5.1: pretty-format "^27.5.1" semver "^7.3.2" -jest-util@^27.5.1: +jest-util@^27.2.0, jest-util@^27.5.1: version "27.5.1" resolved "https://registry.yarnpkg.com/jest-util/-/jest-util-27.5.1.tgz#3ba9771e8e31a0b85da48fe0b0891fb86c01c2f9" integrity sha512-Kv2o/8jNvX1MQ0KGtw480E/w4fBCDOnH6+6DmeKi6LZUIlKA5kwY0YNdlzaWTiVgxqAqik11QyxDOKk543aKXw== @@ -4405,7 +4090,7 @@ jest@^27.4.7: import-local "^3.0.2" jest-cli "^27.5.1" -jetifier@^1.6.2, jetifier@^1.6.6: +jetifier@^1.6.6: version "1.6.8" resolved "https://registry.yarnpkg.com/jetifier/-/jetifier-1.6.8.tgz#e88068697875cbda98c32472902c4d3756247798" integrity sha512-3Zi16h6L5tXDRQJTb221cnRoVG9/9OvreLdLU2/ZjRv/GILL+2Cemt0IKvkowwkDpvouAU1DQPOJ7qaiHeIdrw== @@ -4439,10 +4124,15 @@ jsc-android@^250230.2.1: resolved "https://registry.yarnpkg.com/jsc-android/-/jsc-android-250230.2.1.tgz#3790313a970586a03ab0ad47defbc84df54f1b83" integrity sha512-KmxeBlRjwoqCnBBKGsihFtvsBHyUFlBxJPK4FzeYcIuBfdjv6jFys44JITAgSTbQD+vIdwMEfyZklsuQX0yI1Q== -jscodeshift@^0.13.1: - version "0.13.1" - resolved "https://registry.yarnpkg.com/jscodeshift/-/jscodeshift-0.13.1.tgz#69bfe51e54c831296380585c6d9e733512aecdef" - integrity sha512-lGyiEbGOvmMRKgWk4vf+lUrCWO/8YR8sUR3FKF1Cq5fovjZDlIcw3Hu5ppLHAnEXshVffvaM0eyuY/AbOeYpnQ== +jsc-safe-url@^0.2.2: + version "0.2.4" + resolved "https://registry.yarnpkg.com/jsc-safe-url/-/jsc-safe-url-0.2.4.tgz#141c14fbb43791e88d5dc64e85a374575a83477a" + integrity sha512-0wM3YBWtYePOjfyXQH5MWQ8H7sdk5EXSwZvmSLKk2RboVQ2Bu239jycHDz5J/8Blf3K0Qnoy2b6xD+z10MFB+Q== + +jscodeshift@^0.14.0: + version "0.14.0" + resolved "https://registry.yarnpkg.com/jscodeshift/-/jscodeshift-0.14.0.tgz#7542e6715d6d2e8bde0b4e883f0ccea358b46881" + integrity sha512-7eCC1knD7bLUPuSCwXsMZUH51O8jIcoVyKtI6P0XM0IVzlGjckPy3FIwQlorzbN0Sg79oK+RlohN32Mqf/lrYA== dependencies: "@babel/core" "^7.13.16" "@babel/parser" "^7.13.16" @@ -4457,10 +4147,10 @@ jscodeshift@^0.13.1: chalk "^4.1.2" flow-parser "0.*" graceful-fs "^4.2.4" - micromatch "^3.1.10" + micromatch "^4.0.4" neo-async "^2.5.0" node-dir "^0.1.17" - recast "^0.20.4" + recast "^0.21.0" temp "^0.8.4" write-file-atomic "^2.3.0" @@ -4550,26 +4240,7 @@ jsonfile@^6.0.1: optionalDependencies: graceful-fs "^4.1.6" -kind-of@^3.0.2, kind-of@^3.0.3, kind-of@^3.2.0: - version "3.2.2" - resolved "https://registry.yarnpkg.com/kind-of/-/kind-of-3.2.2.tgz#31ea21a734bab9bbb0f32466d893aea51e4a3c64" - integrity sha1-MeohpzS6ubuw8yRm2JOupR5KPGQ= - dependencies: - is-buffer "^1.1.5" - -kind-of@^4.0.0: - version "4.0.0" - resolved "https://registry.yarnpkg.com/kind-of/-/kind-of-4.0.0.tgz#20813df3d712928b207378691a45066fae72dd57" - integrity sha1-IIE989cSkosgc3hpGkUGb65y3Vc= - dependencies: - is-buffer "^1.1.5" - -kind-of@^5.0.0: - version "5.1.0" - resolved "https://registry.yarnpkg.com/kind-of/-/kind-of-5.1.0.tgz#729c91e2d857b7a419a1f9aa65685c4c33f5845d" - integrity sha512-NGEErnH6F2vUuXDh+OlbcKW7/wOcfdRHaZ7VWtqCztfHri/++YKmP51OdWeGPuqCOba6kk2OTe5d02VmTB80Pw== - -kind-of@^6.0.0, kind-of@^6.0.2: +kind-of@^6.0.2: version "6.0.3" resolved "https://registry.yarnpkg.com/kind-of/-/kind-of-6.0.3.tgz#07c05034a6c349fa06e24fa35aa76db4580ce4dd" integrity sha512-dcS1ul+9tmeD95T+x28/ehLgd9mENa3LsvDTtzm3vyBEO7RPptvAD+t44WVXaUjTBRcrpFeFlC8WCruUR456hw== @@ -4636,7 +4307,7 @@ lodash.throttle@^4.1.1: resolved "https://registry.yarnpkg.com/lodash.throttle/-/lodash.throttle-4.1.1.tgz#c23e91b710242ac70c37f1e1cda9274cc39bf2f4" integrity sha1-wj6RtxAkKscMN/HhzaknTMOb8vQ= -lodash@^4.17.15, lodash@^4.7.0: +lodash@^4.7.0: version "4.17.21" resolved "https://registry.yarnpkg.com/lodash/-/lodash-4.17.21.tgz#679591c564c3bffaae8454cf0b3df370c3d6911c" integrity sha512-v2kDEe57lecTulaDIuNTPy3Ry4gLGJ6Z1O3vE1krgXZNrsQ+LFTGHVxVjcXPs17LhbZVGedAJv8XZ1tvj5FvSg== @@ -4694,18 +4365,6 @@ makeerror@1.0.12: dependencies: tmpl "1.0.5" -map-cache@^0.2.2: - version "0.2.2" - resolved "https://registry.yarnpkg.com/map-cache/-/map-cache-0.2.2.tgz#c32abd0bd6525d9b051645bb4f26ac5dc98a0dbf" - integrity sha1-wyq9C9ZSXZsFFkW7TyasXcmKDb8= - -map-visit@^1.0.0: - version "1.0.0" - resolved "https://registry.yarnpkg.com/map-visit/-/map-visit-1.0.0.tgz#ecdca8f13144e660f1b5bd41f12f3479d98dfb8f" - integrity sha1-7Nyo8TFE5mDxtb1B8S80edmN+48= - dependencies: - object-visit "^1.0.0" - memoize-one@^5.0.0: version "5.2.1" resolved "https://registry.yarnpkg.com/memoize-one/-/memoize-one-5.2.1.tgz#8337aa3c4335581839ec01c3d594090cebe8f00e" @@ -4721,76 +4380,95 @@ merge2@^1.3.0, merge2@^1.4.1: resolved "https://registry.yarnpkg.com/merge2/-/merge2-1.4.1.tgz#4368892f885e907455a6fd7dc55c0c9d404990ae" integrity sha512-8q7VEgMJW4J8tcfVPy8g09NcQwZdbwFEqhe/WZkoIzjn/3TGDwtOCYtXGxA3O8tPzpczCCDgv+P2P5y00ZJOOg== -metro-babel-transformer@0.70.3: - version "0.70.3" - resolved "https://registry.yarnpkg.com/metro-babel-transformer/-/metro-babel-transformer-0.70.3.tgz#dca61852be273824a4b641bd1ecafff07ff3ad1f" - integrity sha512-bWhZRMn+mIOR/s3BDpFevWScz9sV8FGktVfMlF1eJBLoX24itHDbXvTktKBYi38PWIKcHedh6THSFpJogfuwNA== +metro-babel-transformer@0.72.4: + version "0.72.4" + resolved "https://registry.yarnpkg.com/metro-babel-transformer/-/metro-babel-transformer-0.72.4.tgz#5149424896797980aa1758c8ef7c9a80f9d0f587" + integrity sha512-cg1TQUKDkKqrIClrqqIGE8ZDa9kRKSjhBtqPtNYt/ZSywXU41SrldfcI5uzPrzcIrYpH5hnN6OCLRACPgy2vsw== dependencies: "@babel/core" "^7.14.0" - hermes-parser "0.6.0" - metro-source-map "0.70.3" + hermes-parser "0.8.0" + metro-source-map "0.72.4" nullthrows "^1.1.1" -metro-cache-key@0.70.3: - version "0.70.3" - resolved "https://registry.yarnpkg.com/metro-cache-key/-/metro-cache-key-0.70.3.tgz#898803db04178a8f440598afba7d82a9cf35abf7" - integrity sha512-0zpw+IcpM3hmGd5sKMdxNv3sbOIUYnMUvx1/yaM6vNRReSPmOLX0bP8fYf3CGgk8NEreZ1OHbVsuw7bdKt40Mw== +metro-cache-key@0.72.4: + version "0.72.4" + resolved "https://registry.yarnpkg.com/metro-cache-key/-/metro-cache-key-0.72.4.tgz#f03d49214554b25968f04dc5e19dfe018cf9312b" + integrity sha512-DH3cgN4L7IKNCVBy8LBOXQ4tHDdvh7Vl7jWNkQKMOfHWu1EwsTtXD/+zdV7/be4ls/kHxrD0HbGzpK8XhUAHSw== -metro-cache@0.70.3: - version "0.70.3" - resolved "https://registry.yarnpkg.com/metro-cache/-/metro-cache-0.70.3.tgz#42cf3cdf8a7b3691f3bef9a86bed38d4c5f6201f" - integrity sha512-iCix/+z812fUqa6KlOxaTkY6LQQDoXIe/VljXkGIvpygSCmYyhjQpfQVZEVVPezFmUBYXNdabdQ6cYx6JX3yMg== +metro-cache@0.72.4: + version "0.72.4" + resolved "https://registry.yarnpkg.com/metro-cache/-/metro-cache-0.72.4.tgz#e0ffb33dd044a7cf5897a09489088a413bfe7468" + integrity sha512-76fi9OVytiFVSuGQcNoquVOT7AENd0q3n1WmyBeJ7jvl/UrE3/NN3HTWzu2ezG5IxF3cmo5q1ehi0NEpgwaFGg== dependencies: - metro-core "0.70.3" + metro-core "0.72.4" rimraf "^2.5.4" -metro-config@0.70.3, metro-config@^0.70.1: - version "0.70.3" - resolved "https://registry.yarnpkg.com/metro-config/-/metro-config-0.70.3.tgz#fe6f7330f679d5594e5724af7a69d4dbe1bb5bc3" - integrity sha512-SSCDjSTygoCgzoj61DdrBeJzZDRwQxUEfcgc6t6coxWSExXNR4mOngz0q4SAam49Bmjq9J2Jft6qUKnUTPrRgA== +metro-config@0.72.4: + version "0.72.4" + resolved "https://registry.yarnpkg.com/metro-config/-/metro-config-0.72.4.tgz#3ad42b3ca0037125d5615f4cb7e1c7ed9442bedd" + integrity sha512-USv+H14D5RrSpfA5t4t5cbF1CnizgYGz6xJ3HB0r/bDYdJdZTVqB3/mMPft7Z5zHslS00JCG7oE51G1CK/FlKw== dependencies: cosmiconfig "^5.0.5" jest-validate "^26.5.2" - metro "0.70.3" - metro-cache "0.70.3" - metro-core "0.70.3" - metro-runtime "0.70.3" + metro "0.72.4" + metro-cache "0.72.4" + metro-core "0.72.4" + metro-runtime "0.72.4" -metro-core@0.70.3, metro-core@^0.70.1: - version "0.70.3" - resolved "https://registry.yarnpkg.com/metro-core/-/metro-core-0.70.3.tgz#bf4dda15a5185f5a7931de463a1b97ac9ef680a0" - integrity sha512-NzfHB/w5R7yLaOeU1tzPTbBzCRsYSvpKJkLMP0yudszKZzIAZqNdjoEJ9GZ688Wi0ynZxcU0BxukXh4my80ZBw== +metro-core@0.72.4: + version "0.72.4" + resolved "https://registry.yarnpkg.com/metro-core/-/metro-core-0.72.4.tgz#e4939aef4c50d953c44eee99a3c971d5162f1287" + integrity sha512-2JNT1nG0UV1uMrQHQOKUSII0sdS6MhVT3mBt2kwfjCvD+jvi1iYhKJ4kYCRlUQw9XNLGZ/B+C0VDQzlf2M3zVw== dependencies: - jest-haste-map "^27.3.1" lodash.throttle "^4.1.1" - metro-resolver "0.70.3" + metro-resolver "0.72.4" -metro-hermes-compiler@0.70.3: - version "0.70.3" - resolved "https://registry.yarnpkg.com/metro-hermes-compiler/-/metro-hermes-compiler-0.70.3.tgz#ac7ed656fbcf0a59adcd010d3639e4cfdbc76b4f" - integrity sha512-W6WttLi4E72JL/NyteQ84uxYOFMibe0PUr9aBKuJxxfCq6QRnJKOVcNY0NLW0He2tneXGk+8ZsNz8c0flEvYqg== +metro-file-map@0.72.4: + version "0.72.4" + resolved "https://registry.yarnpkg.com/metro-file-map/-/metro-file-map-0.72.4.tgz#8a0c8a0e44d665af90dded2ac6e01baebff8552e" + integrity sha512-Mv5WgTsYs5svTR/df6jhq2aD4IkAuwV5TutHW0BfEg1YccQt8/v7q5ZypmUOkjdSS9bFR4r3677jalr/ceFypQ== + dependencies: + abort-controller "^3.0.0" + anymatch "^3.0.3" + debug "^2.2.0" + fb-watchman "^2.0.0" + graceful-fs "^4.2.4" + invariant "^2.2.4" + jest-regex-util "^27.0.6" + jest-serializer "^27.0.6" + jest-util "^27.2.0" + jest-worker "^27.2.0" + micromatch "^4.0.4" + walker "^1.0.7" + optionalDependencies: + fsevents "^2.1.2" -metro-inspector-proxy@0.70.3: - version "0.70.3" - resolved "https://registry.yarnpkg.com/metro-inspector-proxy/-/metro-inspector-proxy-0.70.3.tgz#321c25b2261e76d8c4bcc39e092714adfcb50a14" - integrity sha512-qQoNdPGrmyoJSWYkxSDpTaAI8xyqVdNDVVj9KRm1PG8niSuYmrCCFGLLFsMvkVYwsCWUGHoGBx0UoAzVp14ejw== +metro-hermes-compiler@0.72.4: + version "0.72.4" + resolved "https://registry.yarnpkg.com/metro-hermes-compiler/-/metro-hermes-compiler-0.72.4.tgz#06c946d74720d5132fa1690df0610ba367d3436c" + integrity sha512-AY1mAT5FKfDRYCthuKo2XHbuhG5TUV4ZpZlJ8peIgkiWICzfy0tau3yu+3jUD456N90CjMCOmdknji4uKiZ8ww== + +metro-inspector-proxy@0.72.4: + version "0.72.4" + resolved "https://registry.yarnpkg.com/metro-inspector-proxy/-/metro-inspector-proxy-0.72.4.tgz#347e9634b6204c38117292edfb11eb2df71c09ad" + integrity sha512-pr+PsbNCZaStWuJRH8oclT170B7NxfgH+UUyTf9/aR+7PjX0gdDabJhPyzA633QgR+EFBaQKZuetHA+f5/cnEQ== dependencies: connect "^3.6.5" debug "^2.2.0" ws "^7.5.1" yargs "^15.3.1" -metro-minify-uglify@0.70.3: - version "0.70.3" - resolved "https://registry.yarnpkg.com/metro-minify-uglify/-/metro-minify-uglify-0.70.3.tgz#2f28129ca5b8ef958f3e3fcf004c3707c7732e1e" - integrity sha512-oHyjV9WDqOlDE1FPtvs6tIjjeY/oP1PNUPYL1wqyYtqvjN+zzAOrcbsAAL1sv+WARaeiMsWkF2bwtNo+Hghoog== +metro-minify-uglify@0.72.4: + version "0.72.4" + resolved "https://registry.yarnpkg.com/metro-minify-uglify/-/metro-minify-uglify-0.72.4.tgz#b4504adc17f093173c0e5d44df32ac9e13f50a88" + integrity sha512-84Rrgie3O7Dqkak9ep/eIpMZkEFzpKD4bngPUNimYqAMCExKL7/aymydB27gKcqwus/BVkAV+aOnFsuOhlgnQg== dependencies: uglify-es "^3.1.9" -metro-react-native-babel-preset@0.70.3: - version "0.70.3" - resolved "https://registry.yarnpkg.com/metro-react-native-babel-preset/-/metro-react-native-babel-preset-0.70.3.tgz#1c77ec4544ecd5fb6c803e70b21284d7483e4842" - integrity sha512-4Nxc1zEiHEu+GTdEMEsHnRgfaBkg8f/Td3+FcQ8NTSvs+xL3LBrQy6N07idWSQZHIdGFf+tTHvRfSIWLD8u8Tg== +metro-react-native-babel-preset@0.72.4: + version "0.72.4" + resolved "https://registry.yarnpkg.com/metro-react-native-babel-preset/-/metro-react-native-babel-preset-0.72.4.tgz#2b320772d2489d1fb3a6413fc58dad13a56eea0e" + integrity sha512-YGCVaYe1H5fOFktdDdL9IwAyiXjPh1t2eZZFp3KFJak6fxKpN+q5PPhe1kzMa77dbCAqgImv43zkfGa6i27eyA== dependencies: "@babel/core" "^7.14.0" "@babel/plugin-proposal-async-generator-functions" "^7.0.0" @@ -4832,63 +4510,64 @@ metro-react-native-babel-preset@0.70.3: "@babel/template" "^7.0.0" react-refresh "^0.4.0" -metro-react-native-babel-transformer@0.70.3, metro-react-native-babel-transformer@^0.70.1: - version "0.70.3" - resolved "https://registry.yarnpkg.com/metro-react-native-babel-transformer/-/metro-react-native-babel-transformer-0.70.3.tgz#195597c32488f820aa9e441bbca7c04fe7de7a2d" - integrity sha512-WKBU6S/G50j9cfmFM4k4oRYprd8u3qjleD4so1E2zbTNILg+gYla7ZFGCAvi2G0ZcqS2XuGCR375c2hF6VVvwg== +metro-react-native-babel-transformer@0.72.4: + version "0.72.4" + resolved "https://registry.yarnpkg.com/metro-react-native-babel-transformer/-/metro-react-native-babel-transformer-0.72.4.tgz#c1a38bf28513374dbb0fce45b4017d8abfe4a071" + integrity sha512-VxM8Cki+/tPAyQRPHEy1bsxAihpxz8cGLdteFo9t0eAJI7/vEegqICxQm4A+RiGQc4f8t2jiwI6YpnDWomI5Gw== dependencies: "@babel/core" "^7.14.0" babel-preset-fbjs "^3.4.0" - hermes-parser "0.6.0" - metro-babel-transformer "0.70.3" - metro-react-native-babel-preset "0.70.3" - metro-source-map "0.70.3" + hermes-parser "0.8.0" + metro-babel-transformer "0.72.4" + metro-react-native-babel-preset "0.72.4" + metro-source-map "0.72.4" nullthrows "^1.1.1" -metro-resolver@0.70.3, metro-resolver@^0.70.1: - version "0.70.3" - resolved "https://registry.yarnpkg.com/metro-resolver/-/metro-resolver-0.70.3.tgz#c64fdd6d0a88fa62f3f99f87e539b5f603bd47bf" - integrity sha512-5Pc5S/Gs4RlLbziuIWtvtFd9GRoILlaRC8RZDVq5JZWcWHywKy/PjNmOBNhpyvtRlzpJfy/ssIfLhu8zINt1Mw== +metro-resolver@0.72.4: + version "0.72.4" + resolved "https://registry.yarnpkg.com/metro-resolver/-/metro-resolver-0.72.4.tgz#37893ff72273a2b7ea529564caa15fe2e2337267" + integrity sha512-aHxq/jypzGyi9Ic9woe//RymfxpzWliAkyTmBWPHE9ypGoiobstK0me2j5XuSfzASzCU8wcVt20qy870rxTWLw== dependencies: absolute-path "^0.0.0" -metro-runtime@0.70.3, metro-runtime@^0.70.1: - version "0.70.3" - resolved "https://registry.yarnpkg.com/metro-runtime/-/metro-runtime-0.70.3.tgz#09231b9d05dcbdfb5a13df0a45307273e6fe1168" - integrity sha512-22xU7UdXZacniTIDZgN2EYtmfau2pPyh97Dcs+cWrLcJYgfMKjWBtesnDcUAQy3PHekDYvBdJZkoQUeskYTM+w== +metro-runtime@0.72.4: + version "0.72.4" + resolved "https://registry.yarnpkg.com/metro-runtime/-/metro-runtime-0.72.4.tgz#b3469fd040a9526bfd897c0517c5f052a059ddeb" + integrity sha512-EA0ltqyYFpjOdpoRqE2U9FJleqTOIK+ZLRlLaDrx4yz3zTqUZ16W6w71dq+qrwD8BPg7bPKQu7RluU3K6tI79A== dependencies: "@babel/runtime" "^7.0.0" + react-refresh "^0.4.0" -metro-source-map@0.70.3: - version "0.70.3" - resolved "https://registry.yarnpkg.com/metro-source-map/-/metro-source-map-0.70.3.tgz#f5976108c18d4661eaa4d188c96713e5d67a903b" - integrity sha512-zsYtZGrwRbbGEFHtmMqqeCH9K9aTGNVPsurMOWCUeQA3VGyVGXPGtLMC+CdAM9jLpUyg6jw2xh0esxi+tYH7Uw== +metro-source-map@0.72.4: + version "0.72.4" + resolved "https://registry.yarnpkg.com/metro-source-map/-/metro-source-map-0.72.4.tgz#3c6444bba22b84d7d7e383f784a1d59e724192de" + integrity sha512-P09aMDEPkLo6BM8VYYoTsH/2B1w6t+mrCwNcNJV1zE+57FPiU4fSBlSeM8G9YeYaezDTHimS2JlMozP+2r+trA== dependencies: "@babel/traverse" "^7.14.0" "@babel/types" "^7.0.0" invariant "^2.2.4" - metro-symbolicate "0.70.3" + metro-symbolicate "0.72.4" nullthrows "^1.1.1" - ob1 "0.70.3" + ob1 "0.72.4" source-map "^0.5.6" vlq "^1.0.0" -metro-symbolicate@0.70.3: - version "0.70.3" - resolved "https://registry.yarnpkg.com/metro-symbolicate/-/metro-symbolicate-0.70.3.tgz#b039e5629c4ed0c999ea0496d580e1c98260f5cb" - integrity sha512-JTYkF1dpeDUssQ84juE1ycnhHki2ylJBBdJE1JHtfu5oC+z1ElDbBdPHq90Uvt8HbRov/ZAnxvv7Zy6asS+WCA== +metro-symbolicate@0.72.4: + version "0.72.4" + resolved "https://registry.yarnpkg.com/metro-symbolicate/-/metro-symbolicate-0.72.4.tgz#3be7c9d1f382fc58198efcb515f2de0ec3fc4181" + integrity sha512-6ZRo66Q4iKiwaQuHjmogkSCCqaSpJ4QzbHsVHRUe57mFIL34lOLYp7aPfmX7NHCmy061HhDox/kGuYZQRmHB3A== dependencies: invariant "^2.2.4" - metro-source-map "0.70.3" + metro-source-map "0.72.4" nullthrows "^1.1.1" source-map "^0.5.6" through2 "^2.0.1" vlq "^1.0.0" -metro-transform-plugins@0.70.3: - version "0.70.3" - resolved "https://registry.yarnpkg.com/metro-transform-plugins/-/metro-transform-plugins-0.70.3.tgz#7fe87cd0d8979b4d5d6e375751d86188fff38fd9" - integrity sha512-dQRIJoTkWZN2IVS2KzgS1hs7ZdHDX3fS3esfifPkqFAEwHiLctCf0EsPgIknp0AjMLvmGWfSLJigdRB/dc0ASw== +metro-transform-plugins@0.72.4: + version "0.72.4" + resolved "https://registry.yarnpkg.com/metro-transform-plugins/-/metro-transform-plugins-0.72.4.tgz#01e95aa277216fb0887610067125fac9271d399e" + integrity sha512-yxB4v/LxQkmN1rjyyeLiV4x+jwCmId4FTTxNrmTYoi0tFPtOBOeSwuqY08LjxZQMJdZOKXqj2bgIewqFXJEkGw== dependencies: "@babel/core" "^7.14.0" "@babel/generator" "^7.14.0" @@ -4896,29 +4575,29 @@ metro-transform-plugins@0.70.3: "@babel/traverse" "^7.14.0" nullthrows "^1.1.1" -metro-transform-worker@0.70.3: - version "0.70.3" - resolved "https://registry.yarnpkg.com/metro-transform-worker/-/metro-transform-worker-0.70.3.tgz#62bfa28ebef98803531c4bcb558de5fc804c94ef" - integrity sha512-MtVVsnHhhBOp9GRLCdAb2mD1dTCsIzT4+m34KMRdBDCEbDIb90YafT5prpU8qbj5uKd0o2FOQdrJ5iy5zQilHw== +metro-transform-worker@0.72.4: + version "0.72.4" + resolved "https://registry.yarnpkg.com/metro-transform-worker/-/metro-transform-worker-0.72.4.tgz#356903c343dc62373b928b4325ad09a103398cc5" + integrity sha512-mIvzy6nRQKMALEdF5g8LXPgCOUi/tGESE5dlb7OSMCj2FAFBm3mTLRrpW5phzK/J6Wg+4Vb9PMS+wGbXR261rA== dependencies: "@babel/core" "^7.14.0" "@babel/generator" "^7.14.0" "@babel/parser" "^7.14.0" "@babel/types" "^7.0.0" babel-preset-fbjs "^3.4.0" - metro "0.70.3" - metro-babel-transformer "0.70.3" - metro-cache "0.70.3" - metro-cache-key "0.70.3" - metro-hermes-compiler "0.70.3" - metro-source-map "0.70.3" - metro-transform-plugins "0.70.3" + metro "0.72.4" + metro-babel-transformer "0.72.4" + metro-cache "0.72.4" + metro-cache-key "0.72.4" + metro-hermes-compiler "0.72.4" + metro-source-map "0.72.4" + metro-transform-plugins "0.72.4" nullthrows "^1.1.1" -metro@0.70.3, metro@^0.70.1: - version "0.70.3" - resolved "https://registry.yarnpkg.com/metro/-/metro-0.70.3.tgz#4290f538ab5446c7050e718b5c5823eea292c5c2" - integrity sha512-uEWS7xg8oTetQDABYNtsyeUjdLhH3KAvLFpaFFoJqUpOk2A3iygszdqmjobFl6W4zrvKDJS+XxdMR1roYvUhTw== +metro@0.72.4: + version "0.72.4" + resolved "https://registry.yarnpkg.com/metro/-/metro-0.72.4.tgz#fdfc43b3329388b5a3e8856727403f93a8c05250" + integrity sha512-UBqL2fswJjsq2LlfMPV4ArqzLzjyN0nReKRijP3DdSxZiaJDG4NC9sQoVJHbH1HP5qXQMAK/SftyAx1c1kuy+w== dependencies: "@babel/code-frame" "^7.0.0" "@babel/core" "^7.14.0" @@ -4938,27 +4617,28 @@ metro@0.70.3, metro@^0.70.1: error-stack-parser "^2.0.6" fs-extra "^1.0.0" graceful-fs "^4.2.4" - hermes-parser "0.6.0" + hermes-parser "0.8.0" image-size "^0.6.0" invariant "^2.2.4" - jest-haste-map "^27.3.1" jest-worker "^27.2.0" + jsc-safe-url "^0.2.2" lodash.throttle "^4.1.1" - metro-babel-transformer "0.70.3" - metro-cache "0.70.3" - metro-cache-key "0.70.3" - metro-config "0.70.3" - metro-core "0.70.3" - metro-hermes-compiler "0.70.3" - metro-inspector-proxy "0.70.3" - metro-minify-uglify "0.70.3" - metro-react-native-babel-preset "0.70.3" - metro-resolver "0.70.3" - metro-runtime "0.70.3" - metro-source-map "0.70.3" - metro-symbolicate "0.70.3" - metro-transform-plugins "0.70.3" - metro-transform-worker "0.70.3" + metro-babel-transformer "0.72.4" + metro-cache "0.72.4" + metro-cache-key "0.72.4" + metro-config "0.72.4" + metro-core "0.72.4" + metro-file-map "0.72.4" + metro-hermes-compiler "0.72.4" + metro-inspector-proxy "0.72.4" + metro-minify-uglify "0.72.4" + metro-react-native-babel-preset "0.72.4" + metro-resolver "0.72.4" + metro-runtime "0.72.4" + metro-source-map "0.72.4" + metro-symbolicate "0.72.4" + metro-transform-plugins "0.72.4" + metro-transform-worker "0.72.4" mime-types "^2.1.27" node-fetch "^2.2.0" nullthrows "^1.1.1" @@ -4971,25 +4651,6 @@ metro@0.70.3, metro@^0.70.1: ws "^7.5.1" yargs "^15.3.1" -micromatch@^3.1.10: - version "3.1.10" - resolved "https://registry.yarnpkg.com/micromatch/-/micromatch-3.1.10.tgz#70859bc95c9840952f359a068a3fc49f9ecfac23" - integrity sha512-MWikgl9n9M3w+bpsY3He8L+w9eF9338xRl8IAO5viDizwSzziFEyUzo2xrrloB64ADbTf8uA8vRqqttDTOmccg== - dependencies: - arr-diff "^4.0.0" - array-unique "^0.3.2" - braces "^2.3.1" - define-property "^2.0.2" - extend-shallow "^3.0.2" - extglob "^2.0.4" - fragment-cache "^0.2.1" - kind-of "^6.0.2" - nanomatch "^1.2.9" - object.pick "^1.3.0" - regex-not "^1.0.0" - snapdragon "^0.8.1" - to-regex "^3.0.2" - micromatch@^4.0.4: version "4.0.5" resolved "https://registry.yarnpkg.com/micromatch/-/micromatch-4.0.5.tgz#bc8999a7cbbf77cdc89f132f6e467051b49090c6" @@ -5032,24 +4693,11 @@ minimatch@^3.0.2, minimatch@^3.0.4, minimatch@^3.1.1: dependencies: brace-expansion "^1.1.7" -minimist@^1.2.0: - version "1.2.7" - resolved "https://registry.yarnpkg.com/minimist/-/minimist-1.2.7.tgz#daa1c4d91f507390437c6a8bc01078e7000c4d18" - integrity sha512-bzfL1YUZsP41gmu/qjrEk0Q6i2ix/cVeAhbCbqH9u3zYutS1cLg00qhrD0M2MVdCcx4Sc0UpP2eBWo9rotpq6g== - minimist@^1.2.6: version "1.2.6" resolved "https://registry.yarnpkg.com/minimist/-/minimist-1.2.6.tgz#8637a5b759ea0d6e98702cfb3a9283323c93af44" integrity sha512-Jsjnk4bw3YJqYzbdyBiNsPWHPfO++UGG749Cxs6peCu5Xg4nrena6OVxOYxrQTqww0Jmwt+Ref8rggumkTLz9Q== -mixin-deep@^1.2.0: - version "1.3.2" - resolved "https://registry.yarnpkg.com/mixin-deep/-/mixin-deep-1.3.2.tgz#1120b43dc359a785dce65b55b82e257ccf479566" - integrity sha512-WRoDn//mXBiJ1H40rqa3vH0toePwSsGb45iInWlTySa+Uu4k3tYUSxa2v1KqAiLtvlrSzaExqS1gtk96A9zvEA== - dependencies: - for-in "^1.0.2" - is-extendable "^1.0.1" - mkdirp@^0.5.1: version "0.5.6" resolved "https://registry.yarnpkg.com/mkdirp/-/mkdirp-0.5.6.tgz#7def03d2432dcae4ba1d611445c48396062255f6" @@ -5072,23 +4720,6 @@ ms@2.1.3: resolved "https://registry.yarnpkg.com/ms/-/ms-2.1.3.tgz#574c8138ce1d2b5861f0b44579dbadd60c6615b2" integrity sha512-6FlzubTLZG3J2a/NVCAleEhjzq5oxgHyaCU9yYXvcLsvoVaHJq/s5xXI6/XXP6tz7R9xAOtHnSO/tXtF3WRTlA== -nanomatch@^1.2.9: - version "1.2.13" - resolved "https://registry.yarnpkg.com/nanomatch/-/nanomatch-1.2.13.tgz#b87a8aa4fc0de8fe6be88895b38983ff265bd119" - integrity sha512-fpoe2T0RbHwBTBUOftAfBPaDEi06ufaUai0mE6Yn1kacc3SnTErfb/h+X94VXzI64rKFHYImXSvdwGGCmwOqCA== - dependencies: - arr-diff "^4.0.0" - array-unique "^0.3.2" - define-property "^2.0.2" - extend-shallow "^3.0.2" - fragment-cache "^0.2.1" - is-windows "^1.0.2" - kind-of "^6.0.2" - object.pick "^1.3.0" - regex-not "^1.0.0" - snapdragon "^0.8.1" - to-regex "^3.0.1" - natural-compare@^1.4.0: version "1.4.0" resolved "https://registry.yarnpkg.com/natural-compare/-/natural-compare-1.4.0.tgz#4abebfeed7541f2c27acfb29bdbbd15c8d5ba4f7" @@ -5172,37 +4803,21 @@ nwsapi@^2.2.0: resolved "https://registry.yarnpkg.com/nwsapi/-/nwsapi-2.2.0.tgz#204879a9e3d068ff2a55139c2c772780681a38b7" integrity sha512-h2AatdwYH+JHiZpv7pt/gSX1XoRGb7L/qSIeuqA6GwYoF9w1vP1cw42TO0aI2pNyshRK5893hNSl+1//vHK7hQ== -ob1@0.70.3: - version "0.70.3" - resolved "https://registry.yarnpkg.com/ob1/-/ob1-0.70.3.tgz#f48cd5a5abf54b0c423b1b06b6d4ff4d049816cb" - integrity sha512-Vy9GGhuXgDRY01QA6kdhToPd8AkLdLpX9GjH5kpqluVqTu70mgOm7tpGoJDZGaNbr9nJlJgnipqHJQRPORixIQ== +ob1@0.72.4: + version "0.72.4" + resolved "https://registry.yarnpkg.com/ob1/-/ob1-0.72.4.tgz#d2ddedb09fb258d69490e8809157518a62b75506" + integrity sha512-/iPJKpXpVEZS0subUvjew4ept5LTBxj1hD20A4mAj9CJkGGPgvbBlfYtFEBubBkk4dv4Ef5lajsnRBYPxF74cQ== object-assign@^4.1.1: version "4.1.1" resolved "https://registry.yarnpkg.com/object-assign/-/object-assign-4.1.1.tgz#2109adc7965887cfc05cbbd442cac8bfbb360863" integrity sha1-IQmtx5ZYh8/AXLvUQsrIv7s2CGM= -object-copy@^0.1.0: - version "0.1.0" - resolved "https://registry.yarnpkg.com/object-copy/-/object-copy-0.1.0.tgz#7e7d858b781bd7c991a41ba975ed3812754e998c" - integrity sha1-fn2Fi3gb18mRpBupde04EnVOmYw= - dependencies: - copy-descriptor "^0.1.0" - define-property "^0.2.5" - kind-of "^3.0.3" - object-keys@^1.1.1: version "1.1.1" resolved "https://registry.yarnpkg.com/object-keys/-/object-keys-1.1.1.tgz#1c47f272df277f3b1daf061677d9c82e2322c60e" integrity sha512-NuAESUOUMrlIXOfHKzD6bpPu3tYt3xvjNdRIQ+FeT0lNb4K8WR70CaDxhuNguS2XG+GjkyMwOzsN5ZktImfhLA== -object-visit@^1.0.0: - version "1.0.1" - resolved "https://registry.yarnpkg.com/object-visit/-/object-visit-1.0.1.tgz#f79c4493af0c5377b59fe39d395e41042dd045bb" - integrity sha1-95xEk68MU3e1n+OdOV5BBC3QRbs= - dependencies: - isobject "^3.0.0" - object.assign@^4.1.0: version "4.1.2" resolved "https://registry.yarnpkg.com/object.assign/-/object.assign-4.1.2.tgz#0ed54a342eceb37b38ff76eb831a0e788cb63940" @@ -5213,13 +4828,6 @@ object.assign@^4.1.0: has-symbols "^1.0.1" object-keys "^1.1.1" -object.pick@^1.3.0: - version "1.3.0" - resolved "https://registry.yarnpkg.com/object.pick/-/object.pick-1.3.0.tgz#87a10ac4c1694bd2e1cbf53591a66141fb5dd747" - integrity sha1-h6EKxMFpS9Lhy/U1kaZhQftd10c= - dependencies: - isobject "^3.0.1" - on-finished@2.4.1: version "2.4.1" resolved "https://registry.yarnpkg.com/on-finished/-/on-finished-2.4.1.tgz#58c8c44116e54845ad57f14ab10b03533184ac3f" @@ -5382,11 +4990,6 @@ parseurl@~1.3.3: resolved "https://registry.yarnpkg.com/parseurl/-/parseurl-1.3.3.tgz#9da19e7bee8d12dff0513ed5b76957793bc2e8d4" integrity sha512-CiyeOxFT/JZyN5m0z9PfXw4SCBJ6Sygz1Dpl0wqjlhDEGGBP1GnsUVEL0p63hoG1fcj3fHynXi9NYO4nWOL+qQ== -pascalcase@^0.1.1: - version "0.1.1" - resolved "https://registry.yarnpkg.com/pascalcase/-/pascalcase-0.1.1.tgz#b363e55e8006ca6fe21784d2db22bd15d7917f14" - integrity sha1-s2PlXoAGym/iF4TS2yK9FdeRfxQ= - path-exists@^3.0.0: version "3.0.0" resolved "https://registry.yarnpkg.com/path-exists/-/path-exists-3.0.0.tgz#ce0ebeaa5f78cb18925ea7d810d7b59b010fd515" @@ -5456,14 +5059,6 @@ pkg-dir@^4.2.0: dependencies: find-up "^4.0.0" -plist@^3.0.2: - version "3.0.5" - resolved "https://registry.yarnpkg.com/plist/-/plist-3.0.5.tgz#2cbeb52d10e3cdccccf0c11a63a85d830970a987" - integrity sha512-83vX4eYdQp3vP9SxuYgEM/G/pJQqLUz/V/xzPrzruLs7fz7jxGQ1msZ/mg1nwZxUSuOp4sb+/bEIbRrbzZRxDA== - dependencies: - base64-js "^1.5.1" - xmlbuilder "^9.0.7" - plist@^3.0.5: version "3.0.6" resolved "https://registry.yarnpkg.com/plist/-/plist-3.0.6.tgz#7cfb68a856a7834bca6dbfe3218eb9c7740145d3" @@ -5477,11 +5072,6 @@ pod-install@^0.1.36: resolved "https://registry.yarnpkg.com/pod-install/-/pod-install-0.1.36.tgz#8090f57f76b42acf24c4325711bfa5730613fcab" integrity sha512-r+f2SAqtM4K81Wv4OAhedbfCtBZhPz+1c1CVkQ1vT33PGAhM5A6GpaJ0pfQVgfZ0tUwagWxlDQ3eOprNyIFO6w== -posix-character-classes@^0.1.0: - version "0.1.1" - resolved "https://registry.yarnpkg.com/posix-character-classes/-/posix-character-classes-0.1.1.tgz#01eac0fe3b5af71a2a6c02feabb8c1fef7e00eab" - integrity sha1-AerA/jta9xoqbAL+q7jB/vfgDqs= - prelude-ls@~1.1.2: version "1.1.2" resolved "https://registry.yarnpkg.com/prelude-ls/-/prelude-ls-1.1.2.tgz#21932a549f5e52ffd9a827f570e04be62a97da54" @@ -5516,10 +5106,10 @@ process-nextick-args@~2.0.0: resolved "https://registry.yarnpkg.com/process-nextick-args/-/process-nextick-args-2.0.1.tgz#7820d9b16120cc55ca9ae7792680ae7dba6d7fe2" integrity sha512-3ouUOpQhtgrbOa17J7+uxOTpITYWaGP7/AhoR3+A+/1e9skrzelGi/dXzEYyvbxubEF6Wn2ypscTKiKJFFn1ag== -promise@^8.2.0: - version "8.2.0" - resolved "https://registry.yarnpkg.com/promise/-/promise-8.2.0.tgz#a1f6280ab67457fbfc8aad2b198c9497e9e5c806" - integrity sha512-+CMAlLHqwRYwBMXKCP+o8ns7DN+xHDUiI+0nArsiJ9y+kJVPLFxEaSw6Ha9s9H0tftxg2Yzl25wqj9G7m5wLZg== +promise@^8.3.0: + version "8.3.0" + resolved "https://registry.yarnpkg.com/promise/-/promise-8.3.0.tgz#8cb333d1edeb61ef23869fbb8a4ea0279ab60e0a" + integrity sha512-rZPNPKTOYVNEEKFaq1HqTgOwZD+4/YHS5ukLzQCypkj+OkYx7iv0mA91lJlpPPZ8vMau3IIGj5Qlwrx+8iiSmg== dependencies: asap "~2.0.6" @@ -5564,10 +5154,10 @@ range-parser@~1.2.1: resolved "https://registry.yarnpkg.com/range-parser/-/range-parser-1.2.1.tgz#3cf37023d199e1c24d1a55b84800c2f3e6468031" integrity sha512-Hrgsx+orqoygnmhFbKaHE6c296J+HTAQXoxEF6gNupROmmGJRoyzfG3ccAveqCBrwr/2yxQ5BVd/GTl5agOwSg== -react-devtools-core@4.24.0: - version "4.24.0" - resolved "https://registry.yarnpkg.com/react-devtools-core/-/react-devtools-core-4.24.0.tgz#7daa196bdc64f3626b3f54f2ff2b96f7c4fdf017" - integrity sha512-Rw7FzYOOzcfyUPaAm9P3g0tFdGqGq2LLiAI+wjYcp6CsF3DeeMrRS3HZAho4s273C29G/DJhx0e8BpRE/QZNGg== +react-devtools-core@4.27.7: + version "4.27.7" + resolved "https://registry.yarnpkg.com/react-devtools-core/-/react-devtools-core-4.27.7.tgz#458a6541483078d60a036c75bf88f54c478086ec" + integrity sha512-12N0HrhCPbD76Z7SkyJdGdXdPGouUsgV6tlEsbSpAnLDO06tjXZP+irht4wPdYwJAJRQ85DxL48eQoz7UmrSuQ== dependencies: shell-quote "^1.6.1" ws "^7" @@ -5609,30 +5199,30 @@ react-native-builder-bob@^0.18.2: optionalDependencies: jetifier "^1.6.6" -react-native-codegen@^0.69.2: - version "0.69.2" - resolved "https://registry.yarnpkg.com/react-native-codegen/-/react-native-codegen-0.69.2.tgz#e33ac3b1486de59ddae687b731ddbfcef8af0e4e" - integrity sha512-yPcgMHD4mqLbckqnWjFBaxomDnBREfRjDi2G/WxNyPBQLD+PXUEmZTkDx6QoOXN+Bl2SkpnNOSsLE2+/RUHoPw== +react-native-codegen@^0.70.7: + version "0.70.7" + resolved "https://registry.yarnpkg.com/react-native-codegen/-/react-native-codegen-0.70.7.tgz#8f6b47a88740ae703209d57b7605538d86dacfa6" + integrity sha512-qXE8Jrhc9BmxDAnCmrHFDLJrzgjsE/mH57dtC4IO7K76AwagdXNCMRp5SA8XdHJzvvHWRaghpiFHEMl9TtOBcQ== dependencies: "@babel/parser" "^7.14.0" flow-parser "^0.121.0" - jscodeshift "^0.13.1" + jscodeshift "^0.14.0" nullthrows "^1.1.1" -react-native-gradle-plugin@^0.0.7: - version "0.0.7" - resolved "https://registry.yarnpkg.com/react-native-gradle-plugin/-/react-native-gradle-plugin-0.0.7.tgz#96602f909745239deab7b589443f14fce5da2056" - integrity sha512-+4JpbIx42zGTONhBTIXSyfyHICHC29VTvhkkoUOJAh/XHPEixpuBduYgf6Y4y9wsN1ARlQhBBoptTvXvAFQf5g== +react-native-gradle-plugin@^0.70.3: + version "0.70.3" + resolved "https://registry.yarnpkg.com/react-native-gradle-plugin/-/react-native-gradle-plugin-0.70.3.tgz#cbcf0619cbfbddaa9128701aa2d7b4145f9c4fc8" + integrity sha512-oOanj84fJEXUg9FoEAQomA8ISG+DVIrTZ3qF7m69VQUJyOGYyDZmPqKcjvRku4KXlEH6hWO9i4ACLzNBh8gC0A== -react-native@^0.69.7: - version "0.69.7" - resolved "https://registry.yarnpkg.com/react-native/-/react-native-0.69.7.tgz#891ba4ed7722f1ab570099ce097c355bef8ceb05" - integrity sha512-T3z2utgRcE/+mMML3Wg4vvpnFoGWJcqWskq+8vdFS4ASM1zYg5Hab5vPlKZp9uncD8weYiGsYwkWXzrvZrsayQ== +react-native@^0.70.15: + version "0.70.15" + resolved "https://registry.yarnpkg.com/react-native/-/react-native-0.70.15.tgz#65f2c5c399ff8e2a892cef9b094cc0888653a874" + integrity sha512-pm2ZPpA+m0Kl0THAy2fptnp7B9+QPexpfad9fSXfqjPufrXG2alwW8kYCn2EO5ZUX6bomZjFEswz6RzdRN/p9A== dependencies: "@jest/create-cache-key-function" "^27.0.1" - "@react-native-community/cli" "^8.0.4" - "@react-native-community/cli-platform-android" "^8.0.4" - "@react-native-community/cli-platform-ios" "^8.0.4" + "@react-native-community/cli" "9.3.5" + "@react-native-community/cli-platform-android" "9.3.4" + "@react-native-community/cli-platform-ios" "9.3.0" "@react-native/assets" "1.0.0" "@react-native/normalize-color" "2.0.0" "@react-native/polyfills" "2.0.0" @@ -5640,24 +5230,23 @@ react-native@^0.69.7: anser "^1.4.9" base64-js "^1.1.2" event-target-shim "^5.0.1" - hermes-engine "~0.11.0" invariant "^2.2.4" jsc-android "^250230.2.1" memoize-one "^5.0.0" - metro-react-native-babel-transformer "0.70.3" - metro-runtime "0.70.3" - metro-source-map "0.70.3" + metro-react-native-babel-transformer "0.72.4" + metro-runtime "0.72.4" + metro-source-map "0.72.4" mkdirp "^0.5.1" nullthrows "^1.1.1" pretty-format "^26.5.2" - promise "^8.2.0" - react-devtools-core "4.24.0" - react-native-codegen "^0.69.2" - react-native-gradle-plugin "^0.0.7" + promise "^8.3.0" + react-devtools-core "4.27.7" + react-native-codegen "^0.70.7" + react-native-gradle-plugin "^0.70.3" react-refresh "^0.4.0" - react-shallow-renderer "16.15.0" + react-shallow-renderer "^16.15.0" regenerator-runtime "^0.13.2" - scheduler "^0.21.0" + scheduler "^0.22.0" stacktrace-parser "^0.1.3" use-sync-external-store "^1.0.0" whatwg-fetch "^3.0.0" @@ -5668,7 +5257,7 @@ react-refresh@^0.4.0: resolved "https://registry.yarnpkg.com/react-refresh/-/react-refresh-0.4.3.tgz#966f1750c191672e76e16c2efa569150cc73ab53" integrity sha512-Hwln1VNuGl/6bVwnd0Xdn1e84gT/8T9aYNL+HAKDArLCS7LWjwr7StE30IEYbIkx0Vi3vs+coQxe+SQDbGbbpA== -react-shallow-renderer@16.15.0: +react-shallow-renderer@^16.15.0: version "16.15.0" resolved "https://registry.yarnpkg.com/react-shallow-renderer/-/react-shallow-renderer-16.15.0.tgz#48fb2cf9b23d23cde96708fe5273a7d3446f4457" integrity sha512-oScf2FqQ9LFVQgA73vr86xl2NaOIX73rh+YFqcOp68CWj56tSfgtGKrEbyhCj0rSijyG9M1CYprTh39fBi5hzA== @@ -5710,12 +5299,12 @@ readline@^1.3.0: resolved "https://registry.yarnpkg.com/readline/-/readline-1.3.0.tgz#c580d77ef2cfc8752b132498060dc9793a7ac01c" integrity sha1-xYDXfvLPyHUrEySYBg3JeTp6wBw= -recast@^0.20.4: - version "0.20.5" - resolved "https://registry.yarnpkg.com/recast/-/recast-0.20.5.tgz#8e2c6c96827a1b339c634dd232957d230553ceae" - integrity sha512-E5qICoPoNL4yU0H0NoBDntNB0Q5oMSNh9usFctYniLBluTthi3RsQVBXIJNbApOlvSwW/RGxIuokPcAc59J5fQ== +recast@^0.21.0: + version "0.21.5" + resolved "https://registry.yarnpkg.com/recast/-/recast-0.21.5.tgz#e8cd22bb51bcd6130e54f87955d33a2b2e57b495" + integrity sha512-hjMmLaUXAm1hIuTqOdeYObMslq/q+Xff6QE3Y2P+uoHAg2nmVlLBps2hzh1UJDdMtDTMXOFewK6ky51JQIeECg== dependencies: - ast-types "0.14.2" + ast-types "0.15.2" esprima "~4.0.0" source-map "~0.6.1" tslib "^2.0.1" @@ -5744,14 +5333,6 @@ regenerator-transform@^0.15.0: dependencies: "@babel/runtime" "^7.8.4" -regex-not@^1.0.0, regex-not@^1.0.2: - version "1.0.2" - resolved "https://registry.yarnpkg.com/regex-not/-/regex-not-1.0.2.tgz#1f4ece27e00b0b65e0247a6810e6a85d83a5752c" - integrity sha512-J6SDjUgDxQj5NusnOtdFxDwN/+HWykR8GELwctJ7mdqhcyy1xEc4SRFHUXvxTp661YaVKAjfRLZ9cCqS6tn32A== - dependencies: - extend-shallow "^3.0.2" - safe-regex "^1.1.0" - regexpu-core@^5.0.1: version "5.0.1" resolved "https://registry.yarnpkg.com/regexpu-core/-/regexpu-core-5.0.1.tgz#c531122a7840de743dcf9c83e923b5560323ced3" @@ -5788,16 +5369,6 @@ regjsparser@^0.8.2: dependencies: jsesc "~0.5.0" -repeat-element@^1.1.2: - version "1.1.4" - resolved "https://registry.yarnpkg.com/repeat-element/-/repeat-element-1.1.4.tgz#be681520847ab58c7568ac75fbfad28ed42d39e9" - integrity sha512-LFiNfRcSu7KK3evMyYOuCzv3L10TW7yC1G2/+StMjK8Y6Vqd2MG7r/Qjw4ghtuCOjFvlnms/iMmLqpvW/ES/WQ== - -repeat-string@^1.6.1: - version "1.6.1" - resolved "https://registry.yarnpkg.com/repeat-string/-/repeat-string-1.6.1.tgz#8dcae470e1c88abc2d600fff4a776286da75e637" - integrity sha1-jcrkcOHIirwtYA//Sndihtp15jc= - require-directory@^2.1.1: version "2.1.1" resolved "https://registry.yarnpkg.com/require-directory/-/require-directory-2.1.1.tgz#8c64ad5fd30dab1c976e2344ffe7f792a6a6df42" @@ -5835,11 +5406,6 @@ resolve-from@^5.0.0: resolved "https://registry.yarnpkg.com/resolve-from/-/resolve-from-5.0.0.tgz#c35225843df8f776df21c57557bc087e9dfdfc69" integrity sha512-qYg9KP24dD5qka9J47d0aVky0N+b4fTU89LN9iDnjB5waksiC49rvMB0PrUJQGoTmH50XPiqOvAjDfaijGxYZw== -resolve-url@^0.2.1: - version "0.2.1" - resolved "https://registry.yarnpkg.com/resolve-url/-/resolve-url-0.2.1.tgz#2c637fe77c893afd2a663fe21aa9080068e2052a" - integrity sha1-LGN/53yJOv0qZj/iGqkIAGjiBSo= - resolve.exports@^1.1.0: version "1.1.0" resolved "https://registry.yarnpkg.com/resolve.exports/-/resolve.exports-1.1.0.tgz#5ce842b94b05146c0e03076985d1d0e7e48c90c9" @@ -5862,11 +5428,6 @@ restore-cursor@^3.1.0: onetime "^5.1.0" signal-exit "^3.0.2" -ret@~0.1.10: - version "0.1.15" - resolved "https://registry.yarnpkg.com/ret/-/ret-0.1.15.tgz#b8a4825d5bdb1fc3f6f53c2bc33f81388681c7bc" - integrity sha512-TTlYpa+OL+vMMNG24xSlQGEJ3B/RzEfUlLct7b5G/ytav+wPrplCpVMFuwzXbkecJrb6IYo1iFb0S9v37754mg== - reusify@^1.0.4: version "1.0.4" resolved "https://registry.yarnpkg.com/reusify/-/reusify-1.0.4.tgz#90da382b1e126efc02146e90845a88db12925d76" @@ -5915,13 +5476,6 @@ safe-buffer@~5.2.0: resolved "https://registry.yarnpkg.com/safe-buffer/-/safe-buffer-5.2.1.tgz#1eaf9fa9bdb1fdd4ec75f58f9cdb4e6b7827eec6" integrity sha512-rp3So07KcdmmKbGvgaNxQSJr7bGVSVk5S9Eq1F+ppbRo70+YeaDxkw5Dd8NPN+GD6bjnYm2VuPuCXmpuYvmCXQ== -safe-regex@^1.1.0: - version "1.1.0" - resolved "https://registry.yarnpkg.com/safe-regex/-/safe-regex-1.1.0.tgz#40a3669f3b077d1e943d44629e157dd48023bf2e" - integrity sha1-QKNmnzsHfR6UPURinhV91IAjvy4= - dependencies: - ret "~0.1.10" - "safer-buffer@>= 2.1.2 < 3": version "2.1.2" resolved "https://registry.yarnpkg.com/safer-buffer/-/safer-buffer-2.1.2.tgz#44fa161b0187b9549dd84bb91802f9bd8385cd6a" @@ -5939,10 +5493,10 @@ saxes@^5.0.1: dependencies: xmlchars "^2.2.0" -scheduler@^0.21.0: - version "0.21.0" - resolved "https://registry.yarnpkg.com/scheduler/-/scheduler-0.21.0.tgz#6fd2532ff5a6d877b6edb12f00d8ab7e8f308820" - integrity sha512-1r87x5fz9MXqswA2ERLo0EbOAU74DpIUO090gIasYTqlVoJeMcl+Z1Rg7WHz+qtPujhS/hGIt9kxZOYBV3faRQ== +scheduler@^0.22.0: + version "0.22.0" + resolved "https://registry.yarnpkg.com/scheduler/-/scheduler-0.22.0.tgz#83a5d63594edf074add9a7198b1bae76c3db01b8" + integrity sha512-6QAm1BgQI88NPYymgGQLCZgvep4FyePDWFpXVK+zNSUgHwlqpJy8VEh8Et0KxTACS4VWwMousBElAZOH9nkkoQ== dependencies: loose-envify "^1.1.0" @@ -6007,16 +5561,6 @@ set-blocking@^2.0.0: resolved "https://registry.yarnpkg.com/set-blocking/-/set-blocking-2.0.0.tgz#045f9782d011ae9a6803ddd382b24392b3d890f7" integrity sha1-BF+XgtARrppoA93TgrJDkrPYkPc= -set-value@^2.0.0, set-value@^2.0.1: - version "2.0.1" - resolved "https://registry.yarnpkg.com/set-value/-/set-value-2.0.1.tgz#a18d40530e6f07de4228c7defe4227af8cad005b" - integrity sha512-JxHc1weCN68wRY0fhCoXpyK55m/XPHafOmK4UWD7m2CI14GMcFypt4w/0+NV5f/ZMby2F6S2wwA7fgynh9gWSw== - dependencies: - extend-shallow "^2.0.1" - is-extendable "^0.1.1" - is-plain-object "^2.0.3" - split-string "^3.0.1" - setprototypeof@1.2.0: version "1.2.0" resolved "https://registry.yarnpkg.com/setprototypeof/-/setprototypeof-1.2.0.tgz#66c9a24a73f9fc28cbe66b09fed3d33dcaf1b424" @@ -6091,47 +5635,6 @@ slice-ansi@^2.0.0: astral-regex "^1.0.0" is-fullwidth-code-point "^2.0.0" -snapdragon-node@^2.0.1: - version "2.1.1" - resolved "https://registry.yarnpkg.com/snapdragon-node/-/snapdragon-node-2.1.1.tgz#6c175f86ff14bdb0724563e8f3c1b021a286853b" - integrity sha512-O27l4xaMYt/RSQ5TR3vpWCAB5Kb/czIcqUFOM/C4fYcLnbZUc1PkjTAMjof2pBWaSTwOUd6qUHcFGVGj7aIwnw== - dependencies: - define-property "^1.0.0" - isobject "^3.0.0" - snapdragon-util "^3.0.1" - -snapdragon-util@^3.0.1: - version "3.0.1" - resolved "https://registry.yarnpkg.com/snapdragon-util/-/snapdragon-util-3.0.1.tgz#f956479486f2acd79700693f6f7b805e45ab56e2" - integrity sha512-mbKkMdQKsjX4BAL4bRYTj21edOf8cN7XHdYUJEe+Zn99hVEYcMvKPct1IqNe7+AZPirn8BCDOQBHQZknqmKlZQ== - dependencies: - kind-of "^3.2.0" - -snapdragon@^0.8.1: - version "0.8.2" - resolved "https://registry.yarnpkg.com/snapdragon/-/snapdragon-0.8.2.tgz#64922e7c565b0e14204ba1aa7d6964278d25182d" - integrity sha512-FtyOnWN/wCHTVXOMwvSv26d+ko5vWlIDD6zoUJ7LW8vh+ZBC8QdljveRP+crNrtBwioEUWy/4dMtbBjA4ioNlg== - dependencies: - base "^0.11.1" - debug "^2.2.0" - define-property "^0.2.5" - extend-shallow "^2.0.1" - map-cache "^0.2.2" - source-map "^0.5.6" - source-map-resolve "^0.5.0" - use "^3.1.0" - -source-map-resolve@^0.5.0: - version "0.5.3" - resolved "https://registry.yarnpkg.com/source-map-resolve/-/source-map-resolve-0.5.3.tgz#190866bece7553e1f8f267a2ee82c606b5509a1a" - integrity sha512-Htz+RnsXWk5+P2slx5Jh3Q66vhQj1Cllm0zvnaY98+NFx+Dv2CF/f5O/t8x+KaNdrdIAsruNzoh/KpialbqAnw== - dependencies: - atob "^2.1.2" - decode-uri-component "^0.2.0" - resolve-url "^0.2.1" - source-map-url "^0.4.0" - urix "^0.1.0" - source-map-support@^0.5.16, source-map-support@^0.5.6: version "0.5.21" resolved "https://registry.yarnpkg.com/source-map-support/-/source-map-support-0.5.21.tgz#04fe7c7f9e1ed2d662233c28cb2b35b9f63f6e4f" @@ -6140,11 +5643,6 @@ source-map-support@^0.5.16, source-map-support@^0.5.6: buffer-from "^1.0.0" source-map "^0.6.0" -source-map-url@^0.4.0: - version "0.4.1" - resolved "https://registry.yarnpkg.com/source-map-url/-/source-map-url-0.4.1.tgz#0af66605a745a5a2f91cf1bbf8a7afbc283dec56" - integrity sha512-cPiFOTLUKvJFIg4SKVScy4ilPPW6rFgMgfuZJPNoDuMs3nC1HbMUycBoJw77xFIp6z1UJQJOfx6C9GMH80DiTw== - source-map@^0.5.6: version "0.5.7" resolved "https://registry.yarnpkg.com/source-map/-/source-map-0.5.7.tgz#8a039d2d1021d22d1ea14c80d8ea468ba2ef3fcc" @@ -6160,13 +5658,6 @@ source-map@^0.7.3: resolved "https://registry.yarnpkg.com/source-map/-/source-map-0.7.3.tgz#5302f8169031735226544092e64981f751750383" integrity sha512-CkCj6giN3S+n9qrYiBTX5gystlENnRW5jZeNLHpe6aue+SrHcG5VYwujhW9s4dY31mEGsxBDrHR6oI69fTXsaQ== -split-string@^3.0.1, split-string@^3.0.2: - version "3.1.0" - resolved "https://registry.yarnpkg.com/split-string/-/split-string-3.1.0.tgz#7cb09dda3a86585705c64b39a6466038682e8fe2" - integrity sha512-NzNVhJDYpwceVVii8/Hu6DKfD2G+NrQHlS/V/qgv763EYudVwEcMQNxd2lh+0VrUByXN/oJkl5grOhYWvQUYiw== - dependencies: - extend-shallow "^3.0.0" - sprintf-js@~1.0.2: version "1.0.3" resolved "https://registry.yarnpkg.com/sprintf-js/-/sprintf-js-1.0.3.tgz#04e6926f662895354f3dd015203633b857297e2c" @@ -6191,14 +5682,6 @@ stacktrace-parser@^0.1.3: dependencies: type-fest "^0.7.1" -static-extend@^0.1.1: - version "0.1.2" - resolved "https://registry.yarnpkg.com/static-extend/-/static-extend-0.1.2.tgz#60809c39cbff55337226fd5e0b520f341f1fb5c6" - integrity sha1-YICcOcv/VTNyJv1eC1IPNB8ftcY= - dependencies: - define-property "^0.2.5" - object-copy "^0.1.0" - statuses@2.0.1: version "2.0.1" resolved "https://registry.yarnpkg.com/statuses/-/statuses-2.0.1.tgz#55cb000ccf1d48728bd23c685a063998cf1a1b63" @@ -6383,21 +5866,6 @@ to-fast-properties@^2.0.0: resolved "https://registry.yarnpkg.com/to-fast-properties/-/to-fast-properties-2.0.0.tgz#dc5e698cbd079265bc73e0377681a4e4e83f616e" integrity sha1-3F5pjL0HkmW8c+A3doGk5Og/YW4= -to-object-path@^0.3.0: - version "0.3.0" - resolved "https://registry.yarnpkg.com/to-object-path/-/to-object-path-0.3.0.tgz#297588b7b0e7e0ac08e04e672f85c1f4999e17af" - integrity sha1-KXWIt7Dn4KwI4E5nL4XB9JmeF68= - dependencies: - kind-of "^3.0.2" - -to-regex-range@^2.1.0: - version "2.1.1" - resolved "https://registry.yarnpkg.com/to-regex-range/-/to-regex-range-2.1.1.tgz#7c80c17b9dfebe599e27367e0d4dd5590141db38" - integrity sha1-fIDBe53+vlmeJzZ+DU3VWQFB2zg= - dependencies: - is-number "^3.0.0" - repeat-string "^1.6.1" - to-regex-range@^5.0.1: version "5.0.1" resolved "https://registry.yarnpkg.com/to-regex-range/-/to-regex-range-5.0.1.tgz#1648c44aae7c8d988a326018ed72f5b4dd0392e4" @@ -6405,16 +5873,6 @@ to-regex-range@^5.0.1: dependencies: is-number "^7.0.0" -to-regex@^3.0.1, to-regex@^3.0.2: - version "3.0.2" - resolved "https://registry.yarnpkg.com/to-regex/-/to-regex-3.0.2.tgz#13cfdd9b336552f30b51f33a8ae1b42a7a7599ce" - integrity sha512-FWtleNAtZ/Ki2qtqej2CXTOayOH9bHDQF+Q48VpWyDXjbYxA4Yz8iDB31zXOBUlOHHKidDbqGVrTUvQMPmBGBw== - dependencies: - define-property "^2.0.2" - extend-shallow "^3.0.2" - regex-not "^1.0.2" - safe-regex "^1.1.0" - toidentifier@1.0.1: version "1.0.1" resolved "https://registry.yarnpkg.com/toidentifier/-/toidentifier-1.0.1.tgz#3be34321a88a820ed1bd80dfaa33e479fbb8dd35" @@ -6512,16 +5970,6 @@ unicode-property-aliases-ecmascript@^2.0.0: resolved "https://registry.yarnpkg.com/unicode-property-aliases-ecmascript/-/unicode-property-aliases-ecmascript-2.0.0.tgz#0a36cb9a585c4f6abd51ad1deddb285c165297c8" integrity sha512-5Zfuy9q/DFr4tfO7ZPeVXb1aPoeQSdeFMLpYuFebehDAhbuevLs5yxSZmIFN1tP5F9Wl4IpJrYojg85/zgyZHQ== -union-value@^1.0.0: - version "1.0.1" - resolved "https://registry.yarnpkg.com/union-value/-/union-value-1.0.1.tgz#0b6fe7b835aecda61c6ea4d4f02c14221e109847" - integrity sha512-tJfXmxMeWYnczCVs7XAEvIV7ieppALdyepWMkHkwciRpZraG/xwT+s2JN8+pr1+8jCRf80FFzvr+MpQeeoF4Xg== - dependencies: - arr-union "^3.1.0" - get-value "^2.0.6" - is-extendable "^0.1.1" - set-value "^2.0.1" - universalify@^0.1.0: version "0.1.2" resolved "https://registry.yarnpkg.com/universalify/-/universalify-0.1.2.tgz#b646f69be3942dabcecc9d6639c80dc105efaa66" @@ -6542,19 +5990,6 @@ unpipe@~1.0.0: resolved "https://registry.yarnpkg.com/unpipe/-/unpipe-1.0.0.tgz#b2bf4ee8514aae6165b4817829d21b2ef49904ec" integrity sha1-sr9O6FFKrmFltIF4KdIbLvSZBOw= -unset-value@^1.0.0: - version "1.0.0" - resolved "https://registry.yarnpkg.com/unset-value/-/unset-value-1.0.0.tgz#8376873f7d2335179ffb1e6fc3a8ed0dfc8ab559" - integrity sha1-g3aHP30jNRef+x5vw6jtDfyKtVk= - dependencies: - has-value "^0.3.1" - isobject "^3.0.0" - -urix@^0.1.0: - version "0.1.0" - resolved "https://registry.yarnpkg.com/urix/-/urix-0.1.0.tgz#da937f7a62e21fec1fd18d49b35c2935067a6c72" - integrity sha1-2pN/emLiH+wf0Y1Js1wpNQZ6bHI= - url-parse@^1.5.3: version "1.5.10" resolved "https://registry.yarnpkg.com/url-parse/-/url-parse-1.5.10.tgz#9d3c2f736c1d75dd3bd2be507dcc111f1e2ea9c1" @@ -6568,11 +6003,6 @@ use-sync-external-store@^1.0.0: resolved "https://registry.yarnpkg.com/use-sync-external-store/-/use-sync-external-store-1.2.0.tgz#7dbefd6ef3fe4e767a0cf5d7287aacfb5846928a" integrity sha512-eEgnFxGQ1Ife9bzYs6VLi8/4X6CObHMw9Qr9tPY43iKwsPw8xE8+EFsf/2cFZ5S3esXgpWgtSCtLNS41F+sKPA== -use@^3.1.0: - version "3.1.1" - resolved "https://registry.yarnpkg.com/use/-/use-3.1.1.tgz#d50c8cac79a19fbc20f2911f56eb973f4e10070f" - integrity sha512-cwESVXlO3url9YWlFW/TA9cshCEhtu7IKJ/p5soJ/gGpj7vbvFrAY/eIioQ6Dw23KjZhYgiIo8HOs1nQ2vr/oQ== - util-deprecate@^1.0.1, util-deprecate@~1.0.1: version "1.0.2" resolved "https://registry.yarnpkg.com/util-deprecate/-/util-deprecate-1.0.2.tgz#450d4dc9fa70de732762fbd2d4a28981419a0ccf" @@ -6793,11 +6223,6 @@ xmlbuilder@^15.1.1: resolved "https://registry.yarnpkg.com/xmlbuilder/-/xmlbuilder-15.1.1.tgz#9dcdce49eea66d8d10b42cae94a79c3c8d0c2ec5" integrity sha512-yMqGBqtXyeN1e3TGYvgNgDVZ3j84W4cwkOXQswghol6APgZWaff9lnbvN7MHYJOiXsvGPXtjTYJEiC9J2wv9Eg== -xmlbuilder@^9.0.7: - version "9.0.7" - resolved "https://registry.yarnpkg.com/xmlbuilder/-/xmlbuilder-9.0.7.tgz#132ee63d2ec5565c557e20f4c22df9aca686b10d" - integrity sha512-7YXTQc3P2l9+0rjaUbLwMKRhtmwg1M1eDf6nag7urC7pIPYLD9W/jmzQ4ptRSUbodw5S0jfoGTflLemQibSpeQ== - xmlbuilder@~11.0.0: version "11.0.1" resolved "https://registry.yarnpkg.com/xmlbuilder/-/xmlbuilder-11.0.1.tgz#be9bae1c8a046e76b31127726347d0ad7002beb3" diff --git a/js/web/docs/webnn-operators.md b/js/web/docs/webnn-operators.md index 4bcfae183ffcb..a6a2ecdf6f467 100644 --- a/js/web/docs/webnn-operators.md +++ b/js/web/docs/webnn-operators.md @@ -6,107 +6,110 @@ operators and the supported opset domain/versions in **WebNN EP** by ONNX Runtim (**Note**: ONNX Runtime only *guarantees* support for models stamped with opset version 7 or above for opset domain 'ai.onnx'.) -[WebNN API](https://webmachinelearning.github.io/webnn) provides two device types `cpu` and `gpu` to leverage different on-device accelerators. WebNN API implementation in Chromium uses TFLite XNNPack delegate backend for `cpu` device type and DirectML backend for `gpu` device type. [The op support status](https://webmachinelearning.github.io/webnn-status/) behind these two backends is inconsistent. +The [WebNN API](https://webmachinelearning.github.io/webnn) is available in the latest versions of Chrome and Edge on Windows, +Linux, macOS, Android, and ChromeOS behind an "Enables WebNN API" flag. The operator support status may vary across these +platforms. Check the [WebNN status](https://webmachinelearning.github.io/webnn-status/) for the latest implementation details. -| Operator | Opset | WebNN API | WebNN CPU | WebNN GPU | Comments | -|:------:|:------:|:------:|:-:|:-:|:------| -| Abs | ai.onnx(7-12, 13+) | abs | ✓ | ✓ | | -| Add | ai.onnx(7-12, 13, 14+) | add | ✓ | ✓ | | -| And | ai.onnx(7+) | logicalAnd | ✗ | ✓ | | -| ArgMax | ai.onnx(7-10, 11, 12, 13+) | argMax | ✓ | ✓ | | -| ArgMin | ai.onnx(7-10, 11, 12, 13+) | argMin | ✓ | ✓ | | -| AveragePool | ai.onnx(7-9, 10, 11, 12-18, 19+) | averagePool2d | ✓ | ✓ | Only supports 4-D input, 2-D 'kernel_shape', 'count_include_pad' value is 0 | -| BatchNormalization | ai.onnx(7-8, 9-13, 14, 15+) | batchNormalization | ✓ | ✓ | Only supports 'training_mode' value is 0, one output | -| Cast | ai.onnx(7-8, 9-12, 13-18, 19-20, 21+) | cast | ✓ | ✓ | WebNN CPU backend doesn't support casting to uint64 data type | -| Ceil | ai.onnx(7-12, 13+) | ceil | ✓ | ✓ | | -| Clip | ai.onnx(7-10, 11, 12, 13+) | clamp | ✓ | ✓ | WebNN CPU backend only supports 3 specific ranges: [0.0, infinity], [-1.0, 1.0], [0.0, 6.0] (Chromium issue: https://issues.chromium.org/issues/326156496) | -| Concat | ai.onnx(7-10, 11-12, 13+) | concat | ✓ | ✓ | | -| Conv | ai.onnx(7-10, 11+) | conv2d | ✓ | ✓ | Only supports 3-D or 4-D input and 'W' (weight) | -| ConvTranspose | ai.onnx(7-10, 11+) | convTranspose2d | ✓ | ✓ | Only supports 3-D or 4-D input and 'W' (weight). WebNN CPU backend only supports default dilations and group | -| Cos | ai.onnx(7+) | cos | ✓ | ✓ | | -| CumSum | ai.onnx(11-13, 14+) | cumulativeSum | ✓ | ✓ | 'axis' input should be a constant | -| Div | ai.onnx(7-12, 13, 14+) | div | ✓ | ✓ | | -| DequantizeLinear | ai.onnx(10-12, 13-18, 19-20, 21-22, 23+) | dequantizeLinear | ✓ | ✓ | The shape of x_scale should be a subsample of the shape of input | -| Dropout | ai.onnx(7-9, 10-11, 12, 13-21, 22+) | identity | ✓ | ✓ | Only supports test mode | -| Einsum | ai.onnx(12+) | reshape, transpose, matmul, reduceSum, mul, triangular | ✓ | ✓ | | -| Elu | ai.onnx(7+) | elu | ✓ | ✓ | WebNN CPU backend only supports 'alpha' value is 1.0 | -| Equal | ai.onnx(7-10, 11-12, 13-18, 19+) | equal | ✓ | ✓ | | -| Erf | ai.onnx(7-9, 10-12, 13+) | erf | ✓ | ✓ | | -| Exp | ai.onnx(7-12, 13+) | exp | ✓ | ✓ | | -| Expand | ai.onnx(8-12, 13+) | expand | ✓ | ✓ | 'shape' input should be a constant | -| Flatten | ai.onnx(7-8, 9-10, 11-12, 13-20, 21+) | reshape | ✓ | ✓ | | -| Floor | ai.onnx(7-12, 13+) | floor | ✓ | ✓ | | -| Gather | ai.onnx(7-10, 11-12, 13+) | gather | ✓ | ✓ | | -| GatherElements | ai.onnx(11-12, 13+) | gatherElements | ✗ | ✓ | | -| GatherND | ai.onnx(11, 12, 13+) | gatherND | ✓ | ✓ | Only supports 'batch_dims' == 0 | -| Gelu | ai.onnx(20+) | gelu | ✓ | ✓ | | -| Gemm | ai.onnx(7-8, 9-10, 11-12, 13+) | gemm | ✓ | ✓ | Only supports 1-D 'C' input | -| GlobalAveragePool | ai.onnx(7+) | averagePool2d | ✓ | ✓ | Only supports 4-D input | -| GlobalMaxPool | ai.onnx(7+) | maxPool2d | ✓ | ✓ | Only supports 4-D input | -| GlobalLpPool| ai.onnx(7+) | l2Pool2d | ✗ | ✓ | Only supports 4-D input, 'p' value is 2 | -| Greater | ai.onnx(7-8, 9-12, 13+) | greater | ✓ | ✓ | | -| GreaterOrEqual | ai.onnx(12-15, 16+) | greaterOrEqual | ✓ | ✓ | | -| GRU | ai.onnx(7-13, 14-21, 22+) | gru | ✓ | ✓ | Only supports 'layout' == 0. 'clip' is not supported. The activation functions in 'activations' must be one of 'Relu', 'Tanh', 'Sigmoid'. Forward and backward activations must be the same if bidirectional. 'sequence_lens' if present should be constant with values equal to the first dimension length of input 'X' | -| HardSigmoid | ai.onnx(7+) | hardSigmoid | ✓ | ✓ | | -| HardSwish | ai.onnx(14+) | hardSwish | ✓ | ✓ | | -| Identity | ai.onnx(7-13, 14-15, 16-18, 19-20, 21+) | identity | ✓ | ✓ | | -| InstanceNormalization | ai.onnx(7+) | instanceNormalization | ✓ | ✓ | | -| LayerNormalization | ai.onnx(7-16, 17+) | layerNormalization | ✓ | ✓ | | -| LeakyRelu | ai.onnx(7-15, 16+) | leakyRelu | ✓ | ✓ | | -| Less | ai.onnx(7-8, 9-12, 13+) | lesser | ✓ | ✓ | | -| LessOrEqual | ai.onnx(12-15, 16+) | lesserOrEqual | ✓ | ✓ | | -| Log | ai.onnx(7-12, 13+) | log | ✓ | ✓ | | -| LpPool | ai.onnx(7-10, 11-17, 18+) | l2Pool2d | ✗ | ✓ | Only supports 4-D input, 2-D 'kernel_shape', 'p' value is 2 | -| LRN | ai.onnx(7-12, 13+) | pad, averagePool2d, transpose, add, mul, pow, div | ✓ | ✓ | | -| LSTM | ai.onnx(7-13, 14-21, 22+) | lstm | ✓ | ✓ | Only supports 'layout' == 0, 'input_forget' == 0. 'clip' is not supported. The activation functions in 'activations' must be one of 'Relu', 'Tanh', 'Sigmoid'. Forward and backward activations must be the same if bidirectional. 'sequence_lens' if present should be constant with values equal to the first dimension length of input 'X' | -| MatMul | ai.onnx(7-8, 9-12, 13+) | matmul | ✓ | ✓ | | -| Max | ai.onnx(7, 8-11, 12, 13+) | max | ✓ | ✓ | | -| MaxPool | ai.onnx(7, 8-9, 10, 11, 12+) | maxPool2d | ✓ | ✓ | Only supports 4-D input, 2-D 'kernel_shape', 'storage_order' != 1, one output | -| Min | ai.onnx(7, 8-11, 12, 13+) | min | ✓ | ✓ | | -| Mul | ai.onnx(7-12, 13, 14+) | mul | ✓ | ✓ | | -| Neg | ai.onnx(7-12, 13+) | neg | ✓ | ✓ | | -| Not | ai.onnx(7+) | logicalNot | ✓ | ✓ | | -| Or | ai.onnx(7+) | logicalOr | ✗ | ✓ | | -| Pad | ai.onnx(7-10, 11-12, 13-17, 18, 19-20, 21+) | pad | ✓ | ✓ | modes == 'wrap' is not supported | -| Pow | ai.onnx(7-11, 12, 13-14, 15+) | pow | ✓ | ✓ | | -| PRelu | ai.onnx(7-8, 9-15, 16+) | prelu | ✓ | ✓ | WebNN CPU backend restricts the last dimension of input and slope to be same (Chromium issue: https://issues.chromium.org/issues/335517470) | -| QuantizeLinear | ai.onnx(10-12, 13-18, 19-20, 21-22, 23+) | quantizeLinear | ✓ | ✓ | The shape of x_scale should be a subsample of the shape of input | -| Reciprocal | ai.onnx(7-12, 13+) | reciprocal | ✓ | ✓ | | -| ReduceL1 | ai.onnx(7-10, 11-12, 13-17, 18+) | reduceL1 | ✓ | ✓ | Input 'axes' if present should be a constant | -| ReduceL2 | ai.onnx(7-10, 11-12, 13-17, 18+) | reduceL2 | ✓ | ✓ | Input 'axes' if present should be a constant | -| ReduceLogSum| ai.onnx(7-10, 11-12, 13-17, 18+) | reduceLogSum| ✓ | ✓ | Input 'axes' if present should be a constant | -| ReduceLogSumExp | ai.onnx(7-10, 11-12, 13-17, 18+) | reduceLogSumExp | ✓ | ✓ | Input 'axes' if present should be a constant | -| ReduceMax | ai.onnx(7-10, 11, 12, 13-17, 18-19, 20+) | reduceMax | ✓ | ✓ | Input 'axes' if present should be a constant | -| ReduceMean | ai.onnx(7-10, 11-12, 13-17, 18+) | reduceMean | ✓ | ✓ | Input 'axes' if present should be a constant | -| ReduceMin | ai.onnx(7-10, 11, 12, 13-17, 18-19, 20+) | reduceMin | ✓ | ✓ | Input 'axes' if present should be a constant | -| ReduceProd | ai.onnx(7-10, 11-12, 13-17, 18+) | reduceProduct | ✓ | ✓ | Input 'axes' if present should be a constant | -| ReduceSum | ai.onnx(7-10, 11-12, 13+) | reduceSum | ✓ | ✓ | Input 'axes' if present should be a constant | -| ReduceSumSquare | ai.onnx(7-10, 11-12, 13-17, 18+) | reduceSumSquare | ✓ | ✓ | Input 'axes' if present should be a constant | -| Relu | ai.onnx(7-12, 13, 14+) | relu | ✓ | ✓ | | -| Reshape | ai.onnx(7-12, 13, 14-18, 19-20, 21+) | reshape | ✓ | ✓ | Input 'shape' should be a constant, 0 dimension value in 'shape' is not supported | -| Resize | ai.onnx(11-12, 13-17, 18, 19+) | resample2d | ✓ | ✓ | Only supports 4-D input, antialias == 0, exclude_outside == 0, keep_aspect_ratio_policy == 'stretch', 'linear' and 'nearest' modes, input 'scales' and 'sizes' if present must be a constant | -| ScatterElements | ai.onnx(11-12, 13-15, 16-17, 18+) | scatterElements | ✗ | ✓ | Only supports 'reduction' == 'none' | -| ScatterND | ai.onnx(11-12, 13-15, 16-17, 18+) | scatterND | ✗ | ✓ | Only supports 'reduction' == 'none' | -| Shape | ai.onnx(7-12, 13-14, 15-18, 19-20, 21+) | slice | ✓ | ✓ | | -| SimplifiedLayerNormalization | ai.onnx(1+) | pow, reduceMean, add, sqrt, div, mul | ✓ | ✓ | | -| Sigmoid | ai.onnx(7-12, 13+) | sigmoid | ✓ | ✓ | | -| Sign | ai.onnx(9-12, 13+) | sign | ✓ | ✓ | | -| SkipSimplifiedLayerNormalization | com.microsoft(1+) | pow, reduceMean, add, sqrt, div, mul | ✓ | ✓ | | -| Softplus | ai.onnx(7+) | softplus | ✓ | ✓ | | -| Softsign | ai.onnx(7+) | softsign | ✓ | ✓ | | -| Sin | ai.onnx(7+) | sin | ✓ | ✓ | | -| Slice | ai.onnx(7-9, 10, 11-12, 13+) | slice, reverse | ✓ | ✓ | Input 'starts', 'ends', 'axes', and 'steps' if present must be a constant | -| Softmax | ai.onnx(7-10, 11-12, 13+) | softmax | ✓ | ✓ | | -| Split | ai.onnx(7-10, 11-12, 13-17, 18+) | split | ✓ | ✓ | Input 'split' if present should be a constant | -| Sqrt | ai.onnx(7-12, 13+) | sqrt | ✓ | ✓ | | -| Squeeze | ai.onnx(7-10, 11-12, 13-20, 21+) | reshape | ✓ | ✓ | Input 'axes' if present should be a constant | -| Sub | ai.onnx(7-12, 13, 14+) | sub | ✓ | ✓ | | -| Tan | ai.onnx(7+) | tan | ✓ | ✓ | | -| Tanh | ai.onnx(7-12, 13+) | tanh | ✓ | ✓ | | -| Tile | ai.onnx(7-12, 13+) | tile | ✗ | ✓ | Input 'repeats' should be a constant | -| Transpose | ai.onnx(7-12, 13-20, 21+) | transpose | ✓ | ✓ | | -| Trilu | ai.onnx(14+) | triangular | ✓ | ✓ | Input 'k' (option 'diagonal' for WebNN) if present should be a constant | -| Unsqueeze | ai.onnx(7-10, 11-12, 13-20, 21+) | reshape | ✓ | ✓ | | -| Where | ai.onnx(7-8, 9-15, 16+) | where | ✓ | ✓ | | -| Xor | ai.onnx(7+) | logicalXor | ✗ | ✓ | | +| Operator | Opset | WebNN API | Comments | +|:------:|:------:|:------:|:------| +| Abs | ai.onnx(7-12, 13+) | abs | | +| Add | ai.onnx(7-12, 13, 14+) | add | | +| And | ai.onnx(7+) | logicalAnd | | +| ArgMax | ai.onnx(7-10, 11, 12, 13+) | argMax | | +| ArgMin | ai.onnx(7-10, 11, 12, 13+) | argMin | | +| AveragePool | ai.onnx(7-9, 10, 11, 12-18, 19+) | averagePool2d | Only supports 4-D input, 2-D 'kernel_shape', 'count_include_pad' value is 0 | +| BatchNormalization | ai.onnx(7-8, 9-13, 14, 15+) | batchNormalization | Only supports 'training_mode' value is 0, one output | +| Cast | ai.onnx(7-8, 9-12, 13-18, 19-20, 21+) | cast | | +| Ceil | ai.onnx(7-12, 13+) | ceil | | +| Clip | ai.onnx(7-10, 11, 12, 13+) | clamp | | +| Concat | ai.onnx(7-10, 11-12, 13+) | concat | | +| Conv | ai.onnx(7-10, 11+) | conv2d | Only supports 3-D or 4-D input and 'W' (weight) | +| ConvTranspose | ai.onnx(7-10, 11+) | convTranspose2d | Only supports 3-D or 4-D input and 'W' (weight) | +| Cos | ai.onnx(7+) | cos | | +| CumSum | ai.onnx(11-13, 14+) | cumulativeSum | 'axis' input should be a constant | +| Div | ai.onnx(7-12, 13, 14+) | div | | +| DequantizeLinear | ai.onnx(10-12, 13-18, 19-20, 21-22, 23+) | dequantizeLinear | The shape of x_scale should be a subsample of the shape of input | +| Dropout | ai.onnx(7-9, 10-11, 12, 13-21, 22+) | identity | Only supports test mode | +| Einsum | ai.onnx(12+) | reshape, transpose, matmul, reduceSum, mul, triangular | | +| Elu | ai.onnx(7+) | elu | | +| Equal | ai.onnx(7-10, 11-12, 13-18, 19+) | equal | | +| Erf | ai.onnx(7-9, 10-12, 13+) | erf | | +| Exp | ai.onnx(7-12, 13+) | exp | | +| Expand | ai.onnx(8-12, 13+) | expand | 'shape' input should be a constant | +| Flatten | ai.onnx(7-8, 9-10, 11-12, 13-20, 21+) | reshape | | +| Floor | ai.onnx(7-12, 13+) | floor | | +| Gather | ai.onnx(7-10, 11-12, 13+) | gather | | +| GatherElements | ai.onnx(11-12, 13+) | gatherElements | | +| GatherND | ai.onnx(11, 12, 13+) | gatherND | Only supports 'batch_dims' == 0 | +| Gelu | ai.onnx(20+) | gelu | | +| Gemm | ai.onnx(7-8, 9-10, 11-12, 13+) | gemm | Only supports 1-D 'C' input | +| GlobalAveragePool | ai.onnx(7+) | averagePool2d | Only supports 4-D input | +| GlobalMaxPool | ai.onnx(7+) | maxPool2d | Only supports 4-D input | +| GlobalLpPool| ai.onnx(7+) | l2Pool2d | Only supports 4-D input, 'p' value is 2 | +| Greater | ai.onnx(7-8, 9-12, 13+) | greater | | +| GreaterOrEqual | ai.onnx(12-15, 16+) | greaterOrEqual | | +| GRU | ai.onnx(7-13, 14-21, 22+) | gru | Only supports 'layout' == 0. 'clip' is not supported. The activation functions in 'activations' must be one of 'Relu', 'Tanh', 'Sigmoid'. Forward and backward activations must be the same if bidirectional. 'sequence_lens' if present should be constant with values equal to the first dimension length of input 'X' | +| HardSigmoid | ai.onnx(7+) | hardSigmoid | | +| HardSwish | ai.onnx(14+) | hardSwish | | +| Identity | ai.onnx(7-13, 14-15, 16-18, 19-20, 21+) | identity | | +| InstanceNormalization | ai.onnx(7+) | instanceNormalization | | +| LayerNormalization | ai.onnx(7-16, 17+) | layerNormalization | | +| LeakyRelu | ai.onnx(7-15, 16+) | leakyRelu | | +| Less | ai.onnx(7-8, 9-12, 13+) | lesser | | +| LessOrEqual | ai.onnx(12-15, 16+) | lesserOrEqual | | +| Log | ai.onnx(7-12, 13+) | log | | +| LpPool | ai.onnx(7-10, 11-17, 18+) | l2Pool2d | Only supports 4-D input, 2-D 'kernel_shape', 'p' value is 2 | +| LRN | ai.onnx(7-12, 13+) | pad, averagePool2d, transpose, add, mul, pow, div | | +| LSTM | ai.onnx(7-13, 14-21, 22+) | lstm | Only supports 'layout' == 0, 'input_forget' == 0. 'clip' is not supported. The activation functions in 'activations' must be one of 'Relu', 'Tanh', 'Sigmoid'. Forward and backward activations must be the same if bidirectional. 'sequence_lens' if present should be constant with values equal to the first dimension length of input 'X' | +| MatMul | ai.onnx(7-8, 9-12, 13+) | matmul | | +| Max | ai.onnx(7, 8-11, 12, 13+) | max | | +| MaxPool | ai.onnx(7, 8-9, 10, 11, 12+) | maxPool2d | Only supports 4-D input, 2-D 'kernel_shape', 'storage_order' != 1, one output | +| Min | ai.onnx(7, 8-11, 12, 13+) | min | | +| Mul | ai.onnx(7-12, 13, 14+) | mul | | +| Neg | ai.onnx(7-12, 13+) | neg | | +| Not | ai.onnx(7+) | logicalNot | | +| Or | ai.onnx(7+) | logicalOr | | +| Pad | ai.onnx(7-10, 11-12, 13-17, 18, 19-20, 21+) | pad | modes == 'wrap' is not supported | +| Pow | ai.onnx(7-11, 12, 13-14, 15+) | pow | | +| PRelu | ai.onnx(7-8, 9-15, 16+) | prelu | | +| QuantizeLinear | ai.onnx(10-12, 13-18, 19-20, 21-22, 23+) | quantizeLinear | The shape of x_scale should be a subsample of the shape of input | +| Reciprocal | ai.onnx(7-12, 13+) | reciprocal | | +| ReduceL1 | ai.onnx(7-10, 11-12, 13-17, 18+) | reduceL1 | Input 'axes' if present should be a constant | +| ReduceL2 | ai.onnx(7-10, 11-12, 13-17, 18+) | reduceL2 | Input 'axes' if present should be a constant | +| ReduceLogSum| ai.onnx(7-10, 11-12, 13-17, 18+) | reduceLogSum | Input 'axes' if present should be a constant | +| ReduceLogSumExp | ai.onnx(7-10, 11-12, 13-17, 18+) | reduceLogSumExp | Input 'axes' if present should be a constant | +| ReduceMax | ai.onnx(7-10, 11, 12, 13-17, 18-19, 20+) | reduceMax | Input 'axes' if present should be a constant | +| ReduceMean | ai.onnx(7-10, 11-12, 13-17, 18+) | reduceMean | Input 'axes' if present should be a constant | +| ReduceMin | ai.onnx(7-10, 11, 12, 13-17, 18-19, 20+) | reduceMin | Input 'axes' if present should be a constant | +| ReduceProd | ai.onnx(7-10, 11-12, 13-17, 18+) | reduceProduct | Input 'axes' if present should be a constant | +| ReduceSum | ai.onnx(7-10, 11-12, 13+) | reduceSum | Input 'axes' if present should be a constant | +| ReduceSumSquare | ai.onnx(7-10, 11-12, 13-17, 18+) | reduceSumSquare | Input 'axes' if present should be a constant | +| Relu | ai.onnx(7-12, 13, 14+) | relu | | +| Reshape | ai.onnx(7-12, 13, 14-18, 19-20, 21+) | reshape | Input 'shape' should be a constant, 0 dimension value in 'shape' is not supported | +| Resize | ai.onnx(11-12, 13-17, 18, 19+) | resample2d | Only supports 4-D input, antialias == 0, exclude_outside == 0, keep_aspect_ratio_policy == 'stretch', 'linear' and 'nearest' modes, input 'scales' and 'sizes' if present must be a constant | +| RotaryEmbedding | com.microsoft(1+) | add, concat, gather, mul, reshape, split | | +| ScatterElements | ai.onnx(11-12, 13-15, 16-17, 18+) | scatterElements | Only supports 'reduction' == 'none' | +| ScatterND | ai.onnx(11-12, 13-15, 16-17, 18+) | scatterND | Only supports 'reduction' == 'none' | +| Shape | ai.onnx(7-12, 13-14, 15-18, 19-20, 21+) | slice | | +| SimplifiedLayerNormalization | ai.onnx(1+) | pow, reduceMean, add, sqrt, div, mul | | +| Sigmoid | ai.onnx(7-12, 13+) | sigmoid | | +| Sign | ai.onnx(9-12, 13+) | sign | | +| SkipSimplifiedLayerNormalization | com.microsoft(1+) | pow, reduceMean, add, sqrt, div, mul | | +| Softplus | ai.onnx(7+) | softplus | | +| Softsign | ai.onnx(7+) | softsign | | +| Sin | ai.onnx(7+) | sin | | +| Slice | ai.onnx(7-9, 10, 11-12, 13+) | slice, reverse | Input 'starts', 'ends', 'axes', and 'steps' if present must be a constant | +| Softmax | ai.onnx(7-10, 11-12, 13+) | softmax | | +| Split | ai.onnx(7-10, 11-12, 13-17, 18+) | split | Input 'split' if present should be a constant | +| Sqrt | ai.onnx(7-12, 13+) | sqrt | | +| Squeeze | ai.onnx(7-10, 11-12, 13-20, 21+) | reshape | Input 'axes' if present should be a constant | +| Sub | ai.onnx(7-12, 13, 14+) | sub | | +| Tan | ai.onnx(7+) | tan | | +| Tanh | ai.onnx(7-12, 13+) | tanh | | +| Tile | ai.onnx(7-12, 13+) | tile | Input 'repeats' should be a constant | +| Transpose | ai.onnx(7-12, 13-20, 21+) | transpose | | +| Trilu | ai.onnx(14+) | triangular | Input 'k' (option 'diagonal' for WebNN) if present should be a constant | +| Unsqueeze | ai.onnx(7-10, 11-12, 13-20, 21+) | reshape | | +| Where | ai.onnx(7-8, 9-15, 16+) | where | | +| Xor | ai.onnx(7+) | logicalXor | | diff --git a/js/web/lib/wasm/jsep/webgpu/ops/3rd-party/conv_backprop_webgpu.ts b/js/web/lib/wasm/jsep/webgpu/ops/3rd-party/conv_backprop_webgpu.ts index 0aa3ad6c4c267..097e2552569c8 100644 --- a/js/web/lib/wasm/jsep/webgpu/ops/3rd-party/conv_backprop_webgpu.ts +++ b/js/web/lib/wasm/jsep/webgpu/ops/3rd-party/conv_backprop_webgpu.ts @@ -46,6 +46,11 @@ export const createConvTranspose2DProgramInfo = ( const inputChannelsPerGroup = wShape[2] / group; const outputChannelsPerGroup = wShape[3]; const aComponents = isChannelsLast ? getMaxComponents(inputChannelsPerGroup) : 1; + const packInputAs4 = isChannelsLast && outputChannelsPerGroup === 1; + const inputChannelsPerGroupInt = packInputAs4 + ? Math.floor(inputChannelsPerGroup / 4) * 4 + : Math.floor(inputChannelsPerGroup / aComponents) * aComponents; + const inputChannelsRemainder = inputChannelsPerGroup - inputChannelsPerGroupInt; const components = isChannelsLast ? getMaxComponents(outputChannelsPerGroup) : 1; const bComponents = isChannelsLast ? (outputChannelsPerGroup === 1 ? aComponents : components) : 1; const outputSize = ShapeUtil.size(outputShape) / components; @@ -78,7 +83,7 @@ export const createConvTranspose2DProgramInfo = ( { type: DataType.uint32, data: dilations }, { type: DataType.uint32, data: effectiveFilterDims }, { type: DataType.int32, data: pads }, - { type: DataType.uint32, data: inputChannelsPerGroup }, + { type: DataType.uint32, data: inputChannelsPerGroupInt }, { type: DataType.uint32, data: outputChannelsPerGroup }, ...createTensorShapeVariables(inputs[0].dims, inputs[1].dims), ]; @@ -114,16 +119,40 @@ export const createConvTranspose2DProgramInfo = ( const calculateResult = (): string => { let calcStr = ''; - if (aComponents === 1) { - calcStr += ` - let w_offset = ${w.indicesToOffset(`${w.type.indices}(u32(wRPerm), u32(wCPerm), inputChannel, wOutChannel)`)}; - let wValue = ${w.getByOffset(`w_offset / ${bComponents}`)}; - dotProd = dotProd + xValue * wValue;`; + if (packInputAs4) { + if (aComponents === 4) { + calcStr += ` + let xValue = ${dy.getByOffset('x_offset')}; + let wValue = ${w.getByOffset('w_offset')}; + dotProd = dotProd + dot(xValue, wValue); + x_offset += 1u; + w_offset += 1u;`; + } else if (aComponents === 2) { + calcStr += ` + dotProd = dotProd + dot(vec4<${dataType}>(${dy.getByOffset('x_offset')}, ${dy.getByOffset('x_offset + 1u')}), vec4<${dataType}>(${w.getByOffset('w_offset')}, ${w.getByOffset('w_offset + 1u')})); + x_offset += 2u; + w_offset += 2u;`; + } else if (aComponents === 1) { + calcStr += ` + dotProd = dotProd + dot(vec4<${dataType}>(${dy.getByOffset('x_offset')}, ${dy.getByOffset('x_offset + 1u')}, ${dy.getByOffset('x_offset + 2u')}, ${dy.getByOffset('x_offset + 3u')}), vec4<${dataType}>(${w.getByOffset('w_offset')}, ${w.getByOffset('w_offset + 1u')}, ${w.getByOffset('w_offset + 2u')}, ${w.getByOffset('w_offset + 3u')})); + x_offset += 4u; + w_offset += 4u;`; + } } else { - if (outputChannelsPerGroup === 1) { + calcStr += ` + let xValue = ${ + isChannelsLast + ? dy.getByOffset( + `${dy.indicesToOffset(`${dy.type.indices}(batch, idyR, idyC, inputChannel)`)} / ${aComponents}`, + ) + : dy.get('batch', 'inputChannel', 'idyR', 'idyC') + }; + `; + if (aComponents === 1) { calcStr += ` - let wValue = ${w.getByOffset(`${w.indicesToOffset(`${w.type.indices}(u32(wRPerm), u32(wCPerm), inputChannel, wOutChannel)`)} / ${bComponents}`)}; - dotProd = dotProd + dot(xValue, wValue);`; + let w_offset = ${w.indicesToOffset(`${w.type.indices}(u32(wRPerm), u32(wCPerm), inputChannel, wOutChannel)`)}; + let wValue = ${w.getByOffset(`w_offset / ${bComponents}`)}; + dotProd = dotProd + xValue * wValue;`; } else { for (let c = 0; c < aComponents; c++) { calcStr += ` @@ -134,6 +163,32 @@ export const createConvTranspose2DProgramInfo = ( } return calcStr; }; + const calculateRemainder = (): string => { + if (inputChannelsRemainder === 0) { + return ''; + } + if (!packInputAs4) { + throw new Error(`packInputAs4 ${packInputAs4} is not true.`); + } + let calcStr = ''; + if (aComponents === 1) { + calcStr += 'dotProd = dotProd'; + for (let i = 0; i < inputChannelsRemainder; i++) { + calcStr += ` + + ${dy.getByOffset(`x_offset + ${i}`)} * ${w.getByOffset(`w_offset + ${i}`)}`; + } + calcStr += ';'; + } else if (aComponents === 2) { + if (inputChannelsRemainder !== 2) { + throw new Error(`Invalid inputChannelsRemainder ${inputChannelsRemainder}.`); + } + calcStr += ` + let xValue = ${dy.getByOffset('x_offset')}; + let wValue = ${w.getByOffset('w_offset')}; + dotProd = dotProd + dot(xValue, wValue);`; + } + return calcStr; + }; const codeSnippet = ` let outputIndices = ${output.offsetToIndices(`global_idx * ${components}`)}; let batch = ${output.indicesGet('outputIndices', 0)}; @@ -148,7 +203,12 @@ export const createConvTranspose2DProgramInfo = ( // Convolve dy(?, ?, d2) with w(:, :, d1, d2) to compute dx(xR, xC, d1). // ? = to be determined. : = across all values in that axis. var dotProd = ${output.type.value}(0.0); - for (var wR: u32 = 0; wR < uniforms.effective_filter_dims.x; wR = wR + 1) { + var wR: u32 = 0; + if (uniforms.dilations.x == 1) { + // Minimum wR >= 0 that satisfies (dyRCorner + wR) % (uniforms.strides.x) == 0 + wR = u32(((dyRCorner + i32(uniforms.strides.x) - 1) / i32(uniforms.strides.x)) * i32(uniforms.strides.x) - dyRCorner); + } + for (; wR < uniforms.effective_filter_dims.x; wR = wR + 1) { if (wR % uniforms.dilations.x != 0) { continue; } @@ -158,10 +218,13 @@ export const createConvTranspose2DProgramInfo = ( wRPerm < 0) { continue; } - wR = wR + uniforms.strides[0] - 1; let idyR: u32 = u32(dyR); - - for (var wC: u32 = 0; wC < uniforms.effective_filter_dims.y; wC = wC + 1) { + var wC: u32 = 0; + if (uniforms.dilations.y == 1) { + // Minimum wC >= 0 that satisfies (dyCCorner + wC) % (uniforms.strides.y) == 0 + wC = u32(((dyCCorner + i32(uniforms.strides.y) - 1) / i32(uniforms.strides.y)) * i32(uniforms.strides.y) - dyCCorner); + } + for (; wC < uniforms.effective_filter_dims.y; wC = wC + 1) { if (wC % uniforms.dilations.y != 0) { continue; } @@ -171,21 +234,24 @@ export const createConvTranspose2DProgramInfo = ( fract(dyC) > 0.0 || wCPerm < 0) { continue; } - wC = wC + uniforms.strides.y - 1; let idyC: u32 = u32(dyC); var inputChannel = groupId * uniforms.input_channels_per_group; - for (var d2: u32 = 0; d2 < uniforms.input_channels_per_group; d2 = d2 + ${aComponents}) { - let xValue = ${ - isChannelsLast - ? dy.getByOffset( - `${dy.indicesToOffset(`${dy.type.indices}(batch, idyR, idyC, inputChannel)`)} / ${aComponents}`, - ) - : dy.get('batch', 'inputChannel', 'idyR', 'idyC') - }; + ${ + packInputAs4 + ? ` + var x_offset = ${dy.indicesToOffset(`${dy.type.indices}(batch, idyR, idyC, inputChannel)`)} / ${aComponents}; + var w_offset = ${w.indicesToOffset(`${w.type.indices}(wRPerm, wCPerm, inputChannel, wOutChannel)`)} / ${bComponents}; + ` + : '' + } + for (var d2: u32 = 0; d2 < uniforms.input_channels_per_group; d2 = d2 + ${packInputAs4 ? 4 : aComponents}) { ${calculateResult()} - inputChannel = inputChannel + ${aComponents}; + inputChannel = inputChannel + ${packInputAs4 ? 4 : aComponents}; } + ${calculateRemainder()} + wC = wC + uniforms.strides.y - 1; } + wR = wR + uniforms.strides[0] - 1; } let value = dotProd${hasBias ? ` + bias[d1 / ${components}]` : ''}; ${output.setByOffset('global_idx', 'value')}; @@ -201,7 +267,7 @@ export const createConvTranspose2DProgramInfo = ( return { name: 'ConvTranspose2D', shaderCache: { - hint: `${attributes.cacheKey};${aComponents}${bComponents}${components}${outputChannelsPerGroup === 1}`, + hint: `${attributes.cacheKey};${aComponents}${bComponents}${components}${outputChannelsPerGroup === 1}${inputChannelsRemainder}`, inputDependencies, }, getRunData: () => ({ diff --git a/js/web/script/build.ts b/js/web/script/build.ts index 6e8c3f0df8192..6006de62b41b6 100644 --- a/js/web/script/build.ts +++ b/js/web/script/build.ts @@ -145,51 +145,15 @@ async function minifyWasmModuleJsForBrowser(filepath: string): Promise { `new Worker(import.meta.url.startsWith('file:')?new URL(BUILD_DEFS.BUNDLE_FILENAME, import.meta.url):new URL(import.meta.url),`, ); - // Find the first and the only occurrence of minified function implementation of "_emscripten_thread_set_strongref": - // ```js - // _emscripten_thread_set_strongref: (thread) => { - // if (ENVIRONMENT_IS_NODE) { - // PThread.pthreads[thread].ref(); - // } - // } - // ``` - // - // It is minified to: (example) - // ```js - // function Pb(a){D&&N[a>>>0].ref()} - // ``` - - // The following code will look for the function name and mark the function call as pure, so that Terser will - // minify the code correctly. - - const markedAsPure = []; - // First, try if we are working on the original (not minified) source file. This is when we are working with the - // debug build. - const isOriginal = contents.includes('PThread.pthreads[thread].ref()'); - if (isOriginal) { - markedAsPure.push('PThread.pthreads[thread].ref'); - } else { - // If it is not the original source file, we need to find the minified function call. - const matches = [...contents.matchAll(/\{[_a-zA-Z][_a-zA-Z0-9]*&&([_a-zA-Z][_a-zA-Z0-9]*\[.+?]\.ref)\(\)}/g)]; - if (matches.length !== 1) { - throw new Error( - `Unexpected number of matches for minified "PThread.pthreads[thread].ref()" in "${filepath}": ${ - matches.length - }.`, - ); - } - // matches[0] is the first and the only match. - // matches[0][0] is the full matched string and matches[0][1] is the first capturing group. - markedAsPure.push(matches[0][1]); - } - + // Use terser to minify the code with special configurations: + // - use `global_defs` to define `process` and `globalThis.process` as `undefined`, so terser can tree-shake the + // Node.js specific code. const terser = await import('terser'); const result = await terser.minify(contents, { module: true, compress: { passes: 2, global_defs: { process: undefined, 'globalThis.process': undefined }, - pure_funcs: markedAsPure, }, }); diff --git a/js/web/test/data/ops/conv-transpose.jsonc b/js/web/test/data/ops/conv-transpose.jsonc index f827601b3a89c..a6a799dccee86 100644 --- a/js/web/test/data/ops/conv-transpose.jsonc +++ b/js/web/test/data/ops/conv-transpose.jsonc @@ -458,6 +458,152 @@ } ] }, + { + "name": "ConvTranspose with output channels = 1", + "operator": "ConvTranspose", + "inputShapeDefinitions": "rankOnly", + "opset": { "domain": "", "version": 17 }, + "attributes": [ + { "name": "kernel_shape", "data": [2, 2], "type": "ints" }, + { "name": "strides", "data": [2, 2], "type": "ints" } + ], + "cases": [ + { + "name": "inChannels = 5", + "inputs": [ + { + "data": [ + 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, + 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45 + ], + "dims": [1, 5, 3, 3], + "type": "float32" + }, + { + "data": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 1, 2, 3, 4, 5, 6, 7, 8], + "dims": [5, 1, 2, 2], + "type": "float32" + }, + { + "data": [2], + "dims": [1], + "type": "float32" + } + ], + "outputs": [ + { + "data": [ + 437, 532, 458, 558, 479, 584, 627, 722, 658, 758, 689, 794, 500, 610, 521, 636, 542, 662, 720, 830, 751, + 866, 782, 902, 563, 688, 584, 714, 605, 740, 813, 938, 844, 974, 875, 1010 + ], + "dims": [1, 1, 6, 6], + "type": "float32" + } + ] + }, + { + "name": "inChannels = 6", + "inputs": [ + { + "data": [ + 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, + 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 1, 2, 3, 4, 5, 6, 7, 8, 9 + ], + "dims": [1, 6, 3, 3], + "type": "float32" + }, + { + "data": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4], + "dims": [6, 1, 2, 2], + "type": "float32" + }, + { + "data": [2], + "dims": [1], + "type": "float32" + } + ], + "outputs": [ + { + "data": [ + 438, 534, 460, 562, 482, 590, 630, 726, 664, 766, 698, 806, 504, 618, 526, 646, 548, 674, 732, 846, 766, + 886, 800, 926, 570, 702, 592, 730, 614, 758, 834, 966, 868, 1006, 902, 1046 + ], + "dims": [1, 1, 6, 6], + "type": "float32" + } + ] + }, + { + "name": "inChannels = 7", + "inputs": [ + { + "data": [ + 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, + 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, + 14, 15, 16, 17, 18 + ], + "dims": [1, 7, 3, 3], + "type": "float32" + }, + { + "data": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8], + "dims": [7, 1, 2, 2], + "type": "float32" + }, + { + "data": [2], + "dims": [1], + "type": "float32" + } + ], + "outputs": [ + { + "data": [ + 488, 594, 515, 628, 542, 662, 700, 806, 741, 854, 782, 902, 569, 696, 596, 730, 623, 764, 823, 950, 864, + 998, 905, 1046, 650, 798, 677, 832, 704, 866, 946, 1094, 987, 1142, 1028, 1190 + ], + "dims": [1, 1, 6, 6], + "type": "float32" + } + ] + }, + { + "name": "inChannels = 8", + "inputs": [ + { + "data": [ + 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, + 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, + 14, 15, 16, 17, 18, 1, 2, 3, 4, 5, 6, 7, 8, 9 + ], + "dims": [1, 8, 3, 3], + "type": "float32" + }, + { + "data": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4], + "dims": [8, 1, 2, 2], + "type": "float32" + }, + { + "data": [2], + "dims": [1], + "type": "float32" + } + ], + "outputs": [ + { + "data": [ + 489, 596, 517, 632, 545, 668, 703, 810, 747, 862, 791, 914, 573, 704, 601, 740, 629, 776, 835, 966, 879, + 1018, 923, 1070, 657, 812, 685, 848, 713, 884, 967, 1122, 1011, 1174, 1055, 1226 + ], + "dims": [1, 1, 6, 6], + "type": "float32" + } + ] + } + ] + }, { "name": "ConvTranspose without bias addition C", "operator": "ConvTranspose", diff --git a/js/web/test/e2e/exports/testcases/vite-default/package-lock.json b/js/web/test/e2e/exports/testcases/vite-default/package-lock.json index 96c19af9479e4..891b40710ff99 100644 --- a/js/web/test/e2e/exports/testcases/vite-default/package-lock.json +++ b/js/web/test/e2e/exports/testcases/vite-default/package-lock.json @@ -12,7 +12,7 @@ }, "devDependencies": { "@vitejs/plugin-vue": "^5.2.1", - "vite": "^6.0.5" + "vite": "^6.0.11" } }, "node_modules/@babel/helper-string-parser": { @@ -1069,9 +1069,9 @@ } }, "node_modules/vite": { - "version": "6.0.7", - "resolved": "https://registry.npmjs.org/vite/-/vite-6.0.7.tgz", - "integrity": "sha512-RDt8r/7qx9940f8FcOIAH9PTViRrghKaK2K1jY3RaAURrEUbm9Du1mJ72G+jlhtG3WwodnfzY8ORQZbBavZEAQ==", + "version": "6.0.11", + "resolved": "https://registry.npmjs.org/vite/-/vite-6.0.11.tgz", + "integrity": "sha512-4VL9mQPKoHy4+FE0NnRE/kbY51TOfaknxAjt3fJbGJxhIpBZiqVzlZDEesWWsuREXHwNdAoOFZ9MkPEVXczHwg==", "dev": true, "license": "MIT", "dependencies": { diff --git a/js/web/test/e2e/exports/testcases/vite-default/package.json b/js/web/test/e2e/exports/testcases/vite-default/package.json index 7a1f370885bf4..9e204875a1d01 100644 --- a/js/web/test/e2e/exports/testcases/vite-default/package.json +++ b/js/web/test/e2e/exports/testcases/vite-default/package.json @@ -13,6 +13,6 @@ }, "devDependencies": { "@vitejs/plugin-vue": "^5.2.1", - "vite": "^6.0.5" + "vite": "^6.0.11" } } diff --git a/js/web/test/e2e/exports/testcases/vite-default/src/components/onnx-helper.js b/js/web/test/e2e/exports/testcases/vite-default/src/components/onnx-helper.js index f6b458ce55683..7272ee7371057 100644 --- a/js/web/test/e2e/exports/testcases/vite-default/src/components/onnx-helper.js +++ b/js/web/test/e2e/exports/testcases/vite-default/src/components/onnx-helper.js @@ -1,14 +1,13 @@ import * as ort from 'onnxruntime-web'; -// The following line uses Vite's "Explicit URL Imports" feature to load the wasm files as asset. +// The following line uses Vite's "Explicit URL Imports" feature to load the wasm file as an asset. // // see https://vite.dev/guide/assets.html#explicit-url-imports // import wasmFileUrl from '/node_modules/onnxruntime-web/dist/ort-wasm-simd-threaded.jsep.wasm?url'; -import mjsFileUrl from '/node_modules/onnxruntime-web/dist/ort-wasm-simd-threaded.jsep.mjs?url'; -// wasmFileUrl is a string that contains the URL of the wasm file. -ort.env.wasm.wasmPaths = { wasm: wasmFileUrl, mjs: mjsFileUrl }; +// wasmFileUrl is the URL of the wasm file. Vite will make sure it's available in both development and production. +ort.env.wasm.wasmPaths = { wasm: wasmFileUrl }; // Model data for "test_abs/model.onnx" const testModelData = diff --git a/onnxruntime/__init__.py b/onnxruntime/__init__.py index 9d533af616288..c874df8153c3d 100644 --- a/onnxruntime/__init__.py +++ b/onnxruntime/__init__.py @@ -7,6 +7,7 @@ For more information on ONNX Runtime, please see `aka.ms/onnxruntime `_ or the `Github project `_. """ + __version__ = "1.21.0" __author__ = "Microsoft" @@ -20,33 +21,35 @@ # meaningful messages to the user. # the saved exception is raised after device version validation. try: - from onnxruntime.capi._pybind_state import ExecutionMode # noqa: F401 - from onnxruntime.capi._pybind_state import ExecutionOrder # noqa: F401 - from onnxruntime.capi._pybind_state import GraphOptimizationLevel # noqa: F401 - from onnxruntime.capi._pybind_state import LoraAdapter # noqa: F401 - from onnxruntime.capi._pybind_state import ModelMetadata # noqa: F401 - from onnxruntime.capi._pybind_state import NodeArg # noqa: F401 - from onnxruntime.capi._pybind_state import OrtAllocatorType # noqa: F401 - from onnxruntime.capi._pybind_state import OrtArenaCfg # noqa: F401 - from onnxruntime.capi._pybind_state import OrtMemoryInfo # noqa: F401 - from onnxruntime.capi._pybind_state import OrtMemType # noqa: F401 - from onnxruntime.capi._pybind_state import OrtSparseFormat # noqa: F401 - from onnxruntime.capi._pybind_state import RunOptions # noqa: F401 - from onnxruntime.capi._pybind_state import SessionIOBinding # noqa: F401 - from onnxruntime.capi._pybind_state import SessionOptions # noqa: F401 - from onnxruntime.capi._pybind_state import create_and_register_allocator # noqa: F401 - from onnxruntime.capi._pybind_state import create_and_register_allocator_v2 # noqa: F401 - from onnxruntime.capi._pybind_state import disable_telemetry_events # noqa: F401 - from onnxruntime.capi._pybind_state import enable_telemetry_events # noqa: F401 - from onnxruntime.capi._pybind_state import get_all_providers # noqa: F401 - from onnxruntime.capi._pybind_state import get_available_providers # noqa: F401 - from onnxruntime.capi._pybind_state import get_build_info # noqa: F401 - from onnxruntime.capi._pybind_state import get_device # noqa: F401 - from onnxruntime.capi._pybind_state import get_version_string # noqa: F401 - from onnxruntime.capi._pybind_state import has_collective_ops # noqa: F401 - from onnxruntime.capi._pybind_state import set_default_logger_severity # noqa: F401 - from onnxruntime.capi._pybind_state import set_default_logger_verbosity # noqa: F401 - from onnxruntime.capi._pybind_state import set_seed # noqa: F401 + from onnxruntime.capi._pybind_state import ( + ExecutionMode, # noqa: F401 + ExecutionOrder, # noqa: F401 + GraphOptimizationLevel, # noqa: F401 + LoraAdapter, # noqa: F401 + ModelMetadata, # noqa: F401 + NodeArg, # noqa: F401 + OrtAllocatorType, # noqa: F401 + OrtArenaCfg, # noqa: F401 + OrtMemoryInfo, # noqa: F401 + OrtMemType, # noqa: F401 + OrtSparseFormat, # noqa: F401 + RunOptions, # noqa: F401 + SessionIOBinding, # noqa: F401 + SessionOptions, # noqa: F401 + create_and_register_allocator, # noqa: F401 + create_and_register_allocator_v2, # noqa: F401 + disable_telemetry_events, # noqa: F401 + enable_telemetry_events, # noqa: F401 + get_all_providers, # noqa: F401 + get_available_providers, # noqa: F401 + get_build_info, # noqa: F401 + get_device, # noqa: F401 + get_version_string, # noqa: F401 + has_collective_ops, # noqa: F401 + set_default_logger_severity, # noqa: F401 + set_default_logger_verbosity, # noqa: F401 + set_seed, # noqa: F401 + ) import_capi_exception = None except Exception as e: @@ -57,12 +60,14 @@ if import_capi_exception: raise import_capi_exception -from onnxruntime.capi.onnxruntime_inference_collection import AdapterFormat # noqa: F401 -from onnxruntime.capi.onnxruntime_inference_collection import InferenceSession # noqa: F401 -from onnxruntime.capi.onnxruntime_inference_collection import IOBinding # noqa: F401 -from onnxruntime.capi.onnxruntime_inference_collection import OrtDevice # noqa: F401 -from onnxruntime.capi.onnxruntime_inference_collection import OrtValue # noqa: F401 -from onnxruntime.capi.onnxruntime_inference_collection import SparseTensor # noqa: F401 +from onnxruntime.capi.onnxruntime_inference_collection import ( + AdapterFormat, # noqa: F401 + InferenceSession, # noqa: F401 + IOBinding, # noqa: F401 + OrtDevice, # noqa: F401 + OrtValue, # noqa: F401 + SparseTensor, # noqa: F401 +) # TODO: thiagofc: Temporary experimental namespace for new PyTorch front-end try: # noqa: SIM105 diff --git a/onnxruntime/contrib_ops/cpu/bert/gqa_attention_base.h b/onnxruntime/contrib_ops/cpu/bert/gqa_attention_base.h index ccaeb6654e286..abb24e20a6178 100644 --- a/onnxruntime/contrib_ops/cpu/bert/gqa_attention_base.h +++ b/onnxruntime/contrib_ops/cpu/bert/gqa_attention_base.h @@ -75,6 +75,7 @@ class GQAAttentionBase { int seqlen_present_kv_cache = static_cast(present_key->Shape().GetDims()[2]); // Compute the attention score. + // TODO(fajin): type depends on kernel supportability size_t bytes = SafeInt(batch_size) * num_heads_ * sequence_length * seqlen_present_kv_cache * sizeof(float); auto attention_probs = allocator->Alloc(bytes); BufferUniquePtr scratch_buffer(attention_probs, BufferDeleter(allocator)); @@ -198,6 +199,11 @@ class GQAAttentionBase { math::GemmEx(CblasNoTrans, CblasTrans, sequence_length, total_seqlen, head_size, alpha, q, static_cast(head_size), k, static_cast(head_size), 0.0f /*bata*/, output, static_cast(present_buffer_sequence_length), nullptr); + // TODO(fajin): update later + // } else if (MlasHGemmSupported(CblasNoTrans, CblasTrans)) { + // MlasGemm(CblasNoTrans, CblasTrans, sequence_length, total_seqlen, head_size, + // q, static_cast(head_size), k, static_cast(head_size), output, + // static_cast(present_buffer_sequence_length), alpha, 0.0f /*beta*/, nullptr); } else { size_t bytes = head_size * (sequence_length + total_seqlen) * sizeof(float); auto q_k_fp32 = allocator->Alloc(bytes); diff --git a/onnxruntime/contrib_ops/cuda/bert/attention_prepare_qkv.cu b/onnxruntime/contrib_ops/cuda/bert/attention_prepare_qkv.cu index c8c0191967d40..282ba2403b135 100644 --- a/onnxruntime/contrib_ops/cuda/bert/attention_prepare_qkv.cu +++ b/onnxruntime/contrib_ops/cuda/bert/attention_prepare_qkv.cu @@ -125,42 +125,31 @@ Status PrepareQkv_Attention(contrib::AttentionParameters& parameters, bool use_fused_kernel = (nullptr != fused_runner && !parameters.is_unidirectional); bool use_fused_causal = (nullptr != fused_runner && parameters.is_unidirectional); - if (data.bias == nullptr) { - assert(nullptr == fused_runner); - // For quantized attention, bias has been added so only need transpose here. - // gemm_buffer should be BxSx3xNxH => qkv: 3xBxNxSxH - assert(qk_head_size == v_head_size); - int matrix_to_trans = (past_present_share_buffer ? 1 : 3); - ORT_RETURN_IF_ERROR(LaunchTransQkv(stream, matrix_to_trans, sequence_length, batch_size, qk_head_size, num_heads, - max_threads_per_block, false, data.gemm_buffer, qkv, 3)); - data.qkv_format = AttentionQkvFormat::Q_K_V_BNSH; - } else { - // For fused TRT attention, transpose qkv to BxSxNx3xH (format 2) - // For flash or memory efficient attention, transpose to 3xBxSxNxH (format 3) - // For unfused kernel, transpose to 3xBxNxSxH (format 1) - // For fused causal kernel, use format 1 since we need have K and V to update present state, - // at the same time, we update gemm_buffer BxSx3xNxH with bias which is used as input for fused causal kernel. - const int format = (use_fused_kernel ? 2 : (use_flash_or_efficient_attention ? 3 : 1)); - data.qkv_format = use_fused_kernel - ? AttentionQkvFormat::QKV_BSN3H - : (use_flash_or_efficient_attention - ? AttentionQkvFormat::Q_K_V_BSNH - : (use_fused_causal - ? AttentionQkvFormat::Q_K_V_BNSH_QKV_BS3NH - : AttentionQkvFormat::Q_K_V_BNSH)); - - // For fused causal, we will update gemm_buffer with bias directly. - T* qkv_add_bias = use_fused_causal ? data.gemm_buffer : nullptr; - - int matrix_to_transpose = ((format == AttentionQkvFormat::Q_K_V_BNSH && past_present_share_buffer) ? 1 : 3); - // format 1: BxSx(NH + NH + NH_v) => BxNxSxH + BxNxSxH + BxNxSxH_v - // format 2: BxSx(NH + NH + NH) => BxSxNx(H + H + H) - LaunchAddBiasTranspose(stream, matrix_to_transpose, format, max_threads_per_block, - batch_size, sequence_length, num_heads, qk_head_size, - data.gemm_buffer, data.bias, qkv, true, v_head_size, qkv_add_bias, - 3, parameters.do_rotary, parameters.rotary_embedding, - parameters.past_sequence_length); - } + // For fused TRT attention, transpose qkv to BxSxNx3xH (format 2) + // For flash or memory efficient attention, transpose to 3xBxSxNxH (format 3) + // For unfused kernel, transpose to 3xBxNxSxH (format 1) + // For fused causal kernel, use format 1 since we need have K and V to update present state, + // at the same time, we update gemm_buffer BxSx3xNxH with bias which is used as input for fused causal kernel. + const int format = (use_fused_kernel ? 2 : (use_flash_or_efficient_attention ? 3 : 1)); + data.qkv_format = use_fused_kernel + ? AttentionQkvFormat::QKV_BSN3H + : (use_flash_or_efficient_attention + ? AttentionQkvFormat::Q_K_V_BSNH + : (use_fused_causal + ? AttentionQkvFormat::Q_K_V_BNSH_QKV_BS3NH + : AttentionQkvFormat::Q_K_V_BNSH)); + + // For fused causal, we will update gemm_buffer with bias directly. + T* qkv_add_bias = use_fused_causal ? data.gemm_buffer : nullptr; + + int matrix_to_transpose = ((format == AttentionQkvFormat::Q_K_V_BNSH && past_present_share_buffer) ? 1 : 3); + // format 1: BxSx(NH + NH + NH_v) => BxNxSxH + BxNxSxH + BxNxSxH_v + // format 2: BxSx(NH + NH + NH) => BxSxNx(H + H + H) + LaunchAddBiasTranspose(stream, matrix_to_transpose, format, max_threads_per_block, + batch_size, sequence_length, num_heads, qk_head_size, + data.gemm_buffer, data.bias, qkv, true, v_head_size, qkv_add_bias, + 3, parameters.do_rotary, parameters.rotary_embedding, + parameters.past_sequence_length); return Status::OK(); } diff --git a/onnxruntime/contrib_ops/cuda/bert/skip_layer_norm.cc b/onnxruntime/contrib_ops/cuda/bert/skip_layer_norm.cc index 3299bc2cb11de..428b903c03682 100644 --- a/onnxruntime/contrib_ops/cuda/bert/skip_layer_norm.cc +++ b/onnxruntime/contrib_ops/cuda/bert/skip_layer_norm.cc @@ -101,6 +101,7 @@ Status SkipLayerNorm::ComputeInternal(OpKernelContext* ctx) const (double)epsilon_, // epsilon reinterpret_cast(gamma->Data()), // gamma (beta != nullptr) ? reinterpret_cast(beta->Data()) : nullptr, // beta + 0, // no broadcast for gamma/beta reinterpret_cast(skip->Data()), // skip or residual to add (bias != nullptr) ? reinterpret_cast(bias->Data()) : nullptr, // bias to add sum_output != nullptr ? reinterpret_cast(sum_output->MutableData()) : nullptr); diff --git a/onnxruntime/contrib_ops/webgpu/bert/attention.cc b/onnxruntime/contrib_ops/webgpu/bert/attention.cc index 86dc959cf2e83..568e75b38a98f 100644 --- a/onnxruntime/contrib_ops/webgpu/bert/attention.cc +++ b/onnxruntime/contrib_ops/webgpu/bert/attention.cc @@ -69,10 +69,10 @@ Status TransferBSDToBNSH(onnxruntime::webgpu::ComputeContext& context, int num_h return context.RunProgram(program); }; -void InitVarStub(std::ostringstream& ss, const Tensor* seqlen_k, bool is_first_prompt) { +void InitVarStub(std::ostringstream& ss, const Tensor* seqlen_k) { if (seqlen_k != nullptr) { ss << "total_sequence_length = u32(seqlen_k[batch_idx]) + 1;\n"; - ss << "var past_sequence_length: u32 = " << (is_first_prompt ? "0" : "total_sequence_length - sequence_length") << ";\n"; + ss << "var past_sequence_length: u32 = select(total_sequence_length - sequence_length, 0u, uniforms.is_first_prompt > 0);\n"; } else { ss << "let past_sequence_length = uniforms.past_sequence_length;\n"; } @@ -106,7 +106,7 @@ Status AttentionProbsProgram::GenerateShaderCode(ShaderHelper& shader) const { << "let sequence_length = uniforms.M;\n" << "var total_sequence_length = uniforms.N;\n"; std::ostringstream oss; - InitVarStub(oss, seqlen_k_, is_first_prompt_); + InitVarStub(oss, seqlen_k_); shader.MainFunctionBody() << oss.str(); shader.MainFunctionBody() << "let kOffset = (workgroup_id.z / " << n_reps_ << ") * uniforms.kv_sequence_length * uniforms.K;\n"; if (has_present_key_) { @@ -121,7 +121,7 @@ Status AttentionProbsProgram::GenerateShaderCode(ShaderHelper& shader) const { " if (n + local_id.y < uniforms.N && w + local_id.x < uniforms.K) {\n" " var idx = TILE_SIZE * local_id.y + local_id.x;\n"; - if ((feed_past_key_ && has_present_key_) || past_present_share_buffer_) { + if ((feed_past_key_ && has_present_key_) || (past_present_share_buffer_ && !is_first_prompt_)) { shader.MainFunctionBody() << " if (n + local_id.y < past_sequence_length) {\n" << " let pastKeyOffset = (workgroup_id.z / " << n_reps_ << ") * uniforms.past_sequence_length * uniforms.K;\n" << " tileK[idx] = " << (past_present_share_buffer_ ? "present_key" : "past_key") << "[pastKeyOffset + (n + local_id.y) * uniforms.K + w + local_id.x];\n" @@ -213,7 +213,8 @@ Status ComputeAttentionProbs(onnxruntime::webgpu::ComputeContext& context, int o {static_cast(past_sequence_length)}, {static_cast(parameters.kv_sequence_length_)}, {static_cast(seqlen_k == nullptr ? total_sequence_length : parameters.seqlen_present_kv_cache_)}, - {static_cast(parameters.n_reps)}}) + {static_cast(parameters.n_reps)}, + {static_cast(parameters.is_first_prompt_ ? 1 : 0)}}) .SetOverridableConstants({{static_cast(tile_size)}}); return context.RunProgram(program); @@ -231,7 +232,7 @@ Status InPlaceSoftmaxProgram::GenerateShaderCode(ShaderHelper& shader) const { << "let sequence_length = uniforms.sequence_length;\n" << "var total_sequence_length = uniforms.total_sequence_length_comp * " << components_ << ";\n"; std::ostringstream oss; - InitVarStub(oss, seqlen_k_, is_first_prompt_); + InitVarStub(oss, seqlen_k_); shader.MainFunctionBody() << oss.str() << "let local_offset = local_idx * uniforms.elements_per_thread;\n" << "let offset = (global_idx / " << work_group_size_ << ") * uniforms.total_sequence_length_comp + local_offset;\n" @@ -285,12 +286,12 @@ Status ComputeInPlaceSoftmax(onnxruntime::webgpu::ComputeContext& context, Tenso } const int elementsPerThread = (total_sequence_length_comp + work_group_size - 1) / work_group_size; - InPlaceSoftmaxProgram program{"InPlaceSoftmax", work_group_size, components, is_first_prompt, seqlen_k}; + InPlaceSoftmaxProgram program{"InPlaceSoftmax", work_group_size, components, seqlen_k}; if (seqlen_k != nullptr) { program.AddInput({seqlen_k, ProgramTensorMetadataDependency::TypeAndRank}); } program.AddOutputs({{probs, ProgramTensorMetadataDependency::TypeAndRank, components}}) - .CacheHint(work_group_size, is_first_prompt) + .CacheHint(work_group_size) .SetDispatchGroupSize(1, sequence_length, batch_size * num_heads) .SetWorkgroupSize(work_group_size) .AddUniformVariables({{static_cast(batch_size)}, @@ -298,7 +299,8 @@ Status ComputeInPlaceSoftmax(onnxruntime::webgpu::ComputeContext& context, Tenso {static_cast(past_sequence_length)}, {static_cast(sequence_length)}, {static_cast(total_sequence_length_comp)}, - {static_cast(elementsPerThread)}}); + {static_cast(elementsPerThread)}, + {static_cast(is_first_prompt ? 1 : 0)}}); return context.RunProgram(program); } @@ -327,7 +329,7 @@ Status VxAttentionScoreProgram::GenerateShaderCode(ShaderHelper& shader) const { << "let sequence_length = uniforms.M;\n" << "var total_sequence_length = uniforms.K;\n"; std::ostringstream oss; - InitVarStub(oss, seqlen_k_, is_first_prompt_); + InitVarStub(oss, seqlen_k_); shader.MainFunctionBody() << oss.str(); shader.MainFunctionBody() << "let vOffset = (workgroup_id.z / " << n_reps_ << ") * uniforms.N * uniforms.kv_sequence_length + n;\n"; if (has_present_value_) { @@ -342,12 +344,12 @@ Status VxAttentionScoreProgram::GenerateShaderCode(ShaderHelper& shader) const { << " if (n < uniforms.N && w + local_id.y < uniforms.K) {\n" << " var idx = TILE_SIZE * local_id.y + local_id.x;\n"; - if ((feed_past_value_ && has_present_value_) || past_present_share_buffer_) { + if ((feed_past_value_ && has_present_value_) || (past_present_share_buffer_ && !is_first_prompt_)) { shader.MainFunctionBody() << " if (w + local_id.y < past_sequence_length) {\n" << " let pastValueOffset = (workgroup_id.z / " << n_reps_ << ") * uniforms.N * uniforms.past_sequence_length + n;\n" << " tileK[idx] = " << (past_present_share_buffer_ ? "present_value" : "past_value") << "[pastValueOffset + (w + local_id.y) * uniforms.N];\n" << " } else if (w + local_id.y - past_sequence_length < uniforms.kv_sequence_length) {\n" - << " tileK[idx] = v[vOffset + (w + local_id.y - uniforms.past_sequence_length) * uniforms.N];\n" + << " tileK[idx] = v[vOffset + (w + local_id.y - past_sequence_length) * uniforms.N];\n" << " }\n"; } else { shader.MainFunctionBody() << " if (w + local_id.y < uniforms.kv_sequence_length) {\n" @@ -425,7 +427,8 @@ Status ComputeVxAttentionScore(onnxruntime::webgpu::ComputeContext& context, int {static_cast(past_sequence_length)}, {static_cast(parameters.kv_sequence_length_)}, {static_cast(seqlen_k == nullptr ? total_sequence_length : parameters.seqlen_present_kv_cache_)}, - {static_cast(parameters.n_reps)}}) + {static_cast(parameters.n_reps)}, + {static_cast(parameters.is_first_prompt_)}}) .SetOverridableConstants({{static_cast(tile_size)}}); return context.RunProgram(program); diff --git a/onnxruntime/contrib_ops/webgpu/bert/attention.h b/onnxruntime/contrib_ops/webgpu/bert/attention.h index 03279fffbc3ef..164ea72b07d9d 100644 --- a/onnxruntime/contrib_ops/webgpu/bert/attention.h +++ b/onnxruntime/contrib_ops/webgpu/bert/attention.h @@ -49,7 +49,8 @@ class AttentionProbsProgram final : public Program { {"past_sequence_length", ProgramUniformVariableDataType::Uint32}, {"kv_sequence_length", ProgramUniformVariableDataType::Uint32}, {"present_sequence_length", ProgramUniformVariableDataType::Uint32}, - {"n_reps", ProgramUniformVariableDataType::Uint32}); + {"n_reps", ProgramUniformVariableDataType::Uint32}, + {"is_first_prompt", ProgramUniformVariableDataType::Uint32}); WEBGPU_PROGRAM_DEFINE_OVERRIDABLE_CONSTANTS({"TILE_SIZE", ProgramConstantDataType::Uint32}); @@ -67,8 +68,8 @@ class AttentionProbsProgram final : public Program { class InPlaceSoftmaxProgram final : public Program { public: - InPlaceSoftmaxProgram(const std::string& kernel_name, int work_group_size, int components, bool is_first_prompt, const Tensor* seqlen_k = nullptr) - : Program{kernel_name}, work_group_size_(work_group_size), components_(components), seqlen_k_(seqlen_k), is_first_prompt_(is_first_prompt) { + InPlaceSoftmaxProgram(const std::string& kernel_name, int work_group_size, int components, const Tensor* seqlen_k = nullptr) + : Program{kernel_name}, work_group_size_(work_group_size), components_(components), seqlen_k_(seqlen_k) { } Status GenerateShaderCode(ShaderHelper& sh) const override; @@ -78,13 +79,13 @@ class InPlaceSoftmaxProgram final : public Program { {"past_sequence_length", ProgramUniformVariableDataType::Uint32}, {"sequence_length", ProgramUniformVariableDataType::Uint32}, {"total_sequence_length_comp", ProgramUniformVariableDataType::Uint32}, - {"elements_per_thread", ProgramUniformVariableDataType::Uint32}); + {"elements_per_thread", ProgramUniformVariableDataType::Uint32}, + {"is_first_prompt", ProgramUniformVariableDataType::Uint32}); private: int work_group_size_; int components_; const Tensor* seqlen_k_; - bool is_first_prompt_; }; class VxAttentionScoreProgram final : public Program { @@ -104,7 +105,8 @@ class VxAttentionScoreProgram final : public Program { {"past_sequence_length", ProgramUniformVariableDataType::Uint32}, {"kv_sequence_length", ProgramUniformVariableDataType::Uint32}, {"present_sequence_length", ProgramUniformVariableDataType::Uint32}, - {"n_reps", ProgramUniformVariableDataType::Uint32}); + {"n_reps", ProgramUniformVariableDataType::Uint32}, + {"is_first_prompt", ProgramUniformVariableDataType::Uint32}); WEBGPU_PROGRAM_DEFINE_OVERRIDABLE_CONSTANTS({"TILE_SIZE", ProgramConstantDataType::Uint32}); diff --git a/onnxruntime/contrib_ops/webgpu/bert/skip_layer_norm.cc b/onnxruntime/contrib_ops/webgpu/bert/skip_layer_norm.cc index fe541f58d34ec..a1840257d734f 100644 --- a/onnxruntime/contrib_ops/webgpu/bert/skip_layer_norm.cc +++ b/onnxruntime/contrib_ops/webgpu/bert/skip_layer_norm.cc @@ -116,7 +116,7 @@ Status SkipLayerNorm::ComputeInternal(onnxruntime::webgpu::ComputeCo auto* output = context.Output(0, x_shape); auto* input_skip_bias_sum = context.Output(3, x_shape); - size_t data_size = x_shape.Size(); + int64_t data_size = x_shape.Size(); if (data_size == 0) { return Status::OK(); } diff --git a/onnxruntime/contrib_ops/webgpu/quantization/matmul_nbits.cc b/onnxruntime/contrib_ops/webgpu/quantization/matmul_nbits.cc index 8abcd78bfff4c..90e6516ff45d1 100644 --- a/onnxruntime/contrib_ops/webgpu/quantization/matmul_nbits.cc +++ b/onnxruntime/contrib_ops/webgpu/quantization/matmul_nbits.cc @@ -65,25 +65,9 @@ Status MatMulNBitsProgram::GenerateShaderCode(ShaderHelper& shader) const { const uint32_t tile_size = WorkgroupSizeX() * components_b_ * 8; // each uint32 has 8 data. const uint32_t a_length_per_tile = tile_size / a.NumComponents(); const uint32_t blocks_per_tile = tile_size / block_size_; - if (tile_m_ == 1) { - shader.AdditionalImplementation() << "fn mm_readA(batch : u32, row : u32, col : u32) -> input_a_value_t {\n" - " if (col < uniforms.input_a_shape[2]) {\n" - << " return " << a.GetByIndices("input_a_indices_t(batch, row, col)") << ";\n" - << " } else {\n" - " return input_a_value_t(0);\n" - " }\n" - "}\n" - << "var sub_a: array;\n" - << "var inter_results: array, " << WorkgroupSizeY() << ">;\n"; - std::string offset = "workgroup_idx * " + std::to_string(WorkgroupSizeY()); - shader.MainFunctionBody() << " let output_indices = " << y.OffsetToIndices(offset) << ";\n" - << " let col = output_indices[2];\n" - " let row = output_indices[1];\n" - " let batch = output_indices[0];\n"; - } else { - ORT_ENFORCE(tile_m_ < WorkgroupSizeY(), "tile_m must be less than or equal to WorkgroupSizeY."); - ORT_ENFORCE(WorkgroupSizeX() == WorkgroupSizeY(), "WorkgroupSizeX must be equal to WorkgroupSizeY."); - + if (tile_m_ > 1 && use_subgroup_) { + ORT_ENFORCE(a.NumComponents() == 4, "input a's components must be equal to 4."); + ORT_ENFORCE(components_b_ == 4, "input b's components must be equal to 4."); shader.AdditionalImplementation() << "fn mm_readA(batch : u32, row : u32, col : u32) -> input_a_value_t {\n" " if (row < uniforms.input_a_shape[1] && col < uniforms.input_a_shape[2]) {\n" << " return " << a.GetByIndices("input_a_indices_t(batch, row, col)") << ";\n" @@ -91,125 +75,297 @@ Status MatMulNBitsProgram::GenerateShaderCode(ShaderHelper& shader) const { " return input_a_value_t(0);\n" " }\n" "}\n" - << "var sub_a: array," << tile_m_ << ">;\n" + << "var sub_b: array, " << WorkgroupSizeY() << ">;\n" + << "var sub_scale: array, " << WorkgroupSizeY() << ">;\n" << "var inter_results: array, " << WorkgroupSizeY() << ">," << tile_m_ << ">;\n"; shader.MainFunctionBody() << " let col = workgroup_id.x * " << WorkgroupSizeY() << ";\n" << " let row = workgroup_id.y * " << tile_m_ << ";\n" << " let batch = workgroup_id.z;\n"; - } - shader.MainFunctionBody() << " let n_blocks_per_col = uniforms.input_b_shape[1];\n" - << " let num_tiles = (n_blocks_per_col - 1) / " << blocks_per_tile << " + 1;\n" - // Loop over shared dimension. - << " for (var tile: u32 = 0; tile < num_tiles; tile += 1) {\n" - << " let a_col_start = tile * " << a_length_per_tile << ";\n" - << " // load one tile A data into shared memory.\n" - << " for (var a_offset = local_idx; a_offset < " << a_length_per_tile << "; a_offset += " << workgroup_size << ") {\n" - << " let a_col = a_col_start + a_offset;\n"; - if (tile_m_ == 1) { - shader.MainFunctionBody() << " sub_a[a_offset] = mm_readA(batch, row, a_col);\n"; - } else { + shader.MainFunctionBody() << " let n_blocks_per_col = uniforms.input_b_shape[1];\n" + << " let num_tiles = (n_blocks_per_col - 1) / " << blocks_per_tile << " + 1;\n" + // Loop over shared dimension. + << " for (var tile: u32 = 0; tile < num_tiles; tile += 1) {\n" + << " // load one tile B/scale data into shared memory.\n" + // Each thread processes one block. + " let b_col = col + local_id.y;\n" + << " let block = tile * " << blocks_per_tile << " + local_id.x;\n" + << " if (b_col < uniforms.input_b_shape[0] && block < n_blocks_per_col) {\n" + << " sub_b[local_id.y][local_id.x] = " << b.GetByIndices("input_b_indices_t(b_col, block, 0)") << ";\n" + << " sub_scale[local_id.y][local_id.x] = " << scales.GetByOffset("b_col * n_blocks_per_col + block") << ";\n" + << " } else {\n" + " sub_b[local_id.y][local_id.x] = input_b_value_t(0);\n" + " sub_scale[local_id.y][local_id.x] = output_value_t(0);\n" + " }\n" + " workgroupBarrier();\n" + << " var in_y = (local_idx % 32) / 4;\n" + " var in_x = (local_idx / 32) * 4 + local_idx % 4;\n" + << " var word_offset = (local_idx % 4) * " << block_size_ / a.NumComponents() << ";\n" + << " if (sg_size == 8u) {\n" + " in_y = local_idx % 8;\n" + " in_x = local_idx / 8;\n" + << " word_offset = 0u;\n" + " } else if (sg_size == 16u) {\n" + " in_y = (local_idx % 16) / 2;\n" + " in_x = (local_idx / 16) * 2 + local_idx % 2;\n" + << " word_offset = (local_idx % 2) * " << block_size_ / a.NumComponents() << ";\n" + << " } else if (sg_size == 32u) {\n" + " in_y = (local_idx % 32) / 4;\n" + " in_x = (local_idx / 32) * 4 + local_idx % 4;\n" + << " word_offset = (local_idx % 4) * " << block_size_ / a.NumComponents() << ";\n" + << " } else if (sg_size == 64u) {\n" + " in_y = local_idx / 8;\n" + " in_x = local_idx % 8;\n" + << " word_offset = (local_idx % 8) * " << block_size_ / a.NumComponents() << ";\n" + << " }\n"; + if (has_zero_points_) { + const auto& zero_points = shader.AddInput("zero_points", ShaderUsage::UseUniform); + shader.MainFunctionBody() << " let zero_point_bytes_per_col = (n_blocks_per_col + 1) / 2;\n" + " let zero_point_byte_count = b_col * zero_point_bytes_per_col + (block >> 0x1u);\n" + " let zero_point_word_index = zero_point_byte_count >> 0x2u;\n" + " let zero_point_byte_offset = zero_point_byte_count & 0x3u;\n" + " let zero_point_nibble_offset: u32 = block & 0x1u;\n" + " let zero_point_bits_offset = (zero_point_byte_offset << 3) + (zero_point_nibble_offset << 2);\n" + << " let zero_point_word = " << zero_points.GetByOffset("zero_point_word_index") << " >> zero_point_bits_offset;\n" + << " let zero_point = output_element_t((zero_point_word) & 0xFu);\n"; + } else { + // The default zero point is 8 for unsigned 4-bit quantization. + shader.MainFunctionBody() << " let zero_point = output_element_t(8.0);\n"; + } + shader.MainFunctionBody() << " let scale = sub_scale[in_y][in_x];\n" + " let b_data = sub_b[in_y][in_x];\n"; + shader.MainFunctionBody() << " let a_col_start = tile * " << a_length_per_tile << ";\n"; for (uint32_t i = 0; i < tile_m_; i++) { - shader.MainFunctionBody() << " sub_a[" << i << "][a_offset] = mm_readA(batch, row + " << i << ", a_col);\n"; + shader.MainFunctionBody() << " let a_data" << i << " = mm_readA(batch, row + " << i << ", a_col_start + local_idx);\n"; } - } - shader.MainFunctionBody() << " }\n" - " workgroupBarrier();\n" - // Each thread processes one block. - " let b_row = col + local_id.y;\n" - << " let block = tile * " << blocks_per_tile << " + local_id.x;\n"; - if (has_zero_points_) { - const auto& zero_points = shader.AddInput("zero_points", ShaderUsage::UseUniform); - shader.MainFunctionBody() << " let zero_point_bytes_per_col = (n_blocks_per_col + 1) / 2;\n" - " let zero_point_byte_count = b_row * zero_point_bytes_per_col + (block >> 0x1u);\n" - " let zero_point_word_index = zero_point_byte_count >> 0x2u;\n" - " let zero_point_byte_offset = zero_point_byte_count & 0x3u;\n" - " let zero_point_nibble_offset: u32 = block & 0x1u;\n" - " let zero_point_bits_offset = (zero_point_byte_offset << 3) + (zero_point_nibble_offset << 2);\n" - << " let zero_point_word = " << zero_points.GetByOffset("zero_point_word_index") << " >> zero_point_bits_offset;\n" - << " let zero_point = output_element_t((zero_point_word) & 0xFu);\n"; - } else { - // The default zero point is 8 for unsigned 4-bit quantization. - shader.MainFunctionBody() << " let zero_point = output_element_t(8.0);\n"; - } - shader.MainFunctionBody() << " var scale = output_element_t(0);\n" - " var b_data = input_b_value_t(0);\n" - << " if (block < n_blocks_per_col) {\n" - << " scale = " << scales.GetByOffset("b_row * n_blocks_per_col + block") << ";\n" - << " b_data = " << b.GetByIndices("input_b_indices_t(b_row, block, 0)") << ";\n" - << " }\n" - << " var word_offset = local_id.x * " << block_size_ / a.NumComponents() << ";\n" - << " for (var i: u32 = 0; i < " << components_b_ << "; i++) {\n"; - shader.MainFunctionBody() << " let b_value = b_data"; - if (components_b_ > 1) { - shader.MainFunctionBody() << "[i]"; - } - shader.MainFunctionBody() << ";\n" - " let b_value_lower = unpack4xU8(b_value & 0x0F0F0F0Fu);\n" - " let b_value_upper = unpack4xU8((b_value >> 4) & 0x0F0F0F0Fu);\n" - " let b_quantized_values = mat2x4(output_element_t(b_value_lower[0]), output_element_t(b_value_upper[0]), output_element_t(b_value_lower[1]), output_element_t(b_value_upper[1]), output_element_t(b_value_lower[2]), output_element_t(b_value_upper[2]), output_element_t(b_value_lower[3]), output_element_t(b_value_upper[3]));\n" - " let b_dequantized_values = (b_quantized_values - mat2x4("; - for (int i = 0; i < 8; i++) { - shader.MainFunctionBody() << "zero_point"; - if (i < 7) { - shader.MainFunctionBody() << ", "; + + shader.MainFunctionBody() << " if (sg_size == 8u) {\n"; + shader.MainFunctionBody() << " for (var i: u32 = 0; i < 4; i++) {\n"; + shader.MainFunctionBody() << " let b_value = b_data[i];\n" + " let b_value_lower = unpack4xU8(b_value & 0x0F0F0F0Fu);\n" + " let b_value_upper = unpack4xU8((b_value >> 4) & 0x0F0F0F0Fu);\n" + " let b_quantized_values = mat2x4(output_element_t(b_value_lower[0]), output_element_t(b_value_upper[0]), output_element_t(b_value_lower[1]), output_element_t(b_value_upper[1]), output_element_t(b_value_lower[2]), output_element_t(b_value_upper[2]), output_element_t(b_value_lower[3]), output_element_t(b_value_upper[3]));\n" + " let b_dequantized_values = (b_quantized_values - mat2x4(zero_point, zero_point, zero_point, zero_point, zero_point, zero_point, zero_point, zero_point)) * scale;\n"; + for (uint32_t i = 0; i < tile_m_; i++) { + if (i == 0) { + shader.MainFunctionBody() << " var "; + } + shader.MainFunctionBody() << " a0 = subgroupShuffle(a_data" << i << ", i * 2);\n"; + if (i == 0) { + shader.MainFunctionBody() << " var "; + } + shader.MainFunctionBody() << " a1 = subgroupShuffle(a_data" << i << ", i * 2 + 1);\n"; + shader.MainFunctionBody() << " inter_results[" << i << "][in_y][in_x] += dot(a0, b_dequantized_values[0]) + dot(a1, b_dequantized_values[1]);\n"; } - } - shader.MainFunctionBody() << ")) * scale;\n"; - if (tile_m_ == 1) { - switch (a.NumComponents()) { - case 1: - shader.MainFunctionBody() << " inter_results[local_id.y][local_id.x] += dot(vec4(sub_a[word_offset], sub_a[word_offset + 1], sub_a[word_offset + 2], sub_a[word_offset + 3]), b_dequantized_values[0]) + dot(vec4(sub_a[word_offset + 4], sub_a[word_offset + 5], sub_a[word_offset + 6], sub_a[word_offset + 7]), b_dequantized_values[1]);\n"; - break; - case 2: - shader.MainFunctionBody() << " inter_results[local_id.y][local_id.x] += dot(vec4(sub_a[word_offset], sub_a[word_offset + 1]), b_dequantized_values[0]) + dot(vec4(sub_a[word_offset + 2], sub_a[word_offset + 3]), b_dequantized_values[1]);\n"; - break; - case 4: - shader.MainFunctionBody() << " inter_results[local_id.y][local_id.x] += dot(sub_a[word_offset], b_dequantized_values[0]) + dot(sub_a[word_offset + 1], b_dequantized_values[1]);\n"; - break; - default: - break; + shader.MainFunctionBody() << " }\n"; + shader.MainFunctionBody() << " } else if (sg_size == 16u) {\n"; + shader.MainFunctionBody() << " for (var i: u32 = 0; i < 4; i++) {\n"; + shader.MainFunctionBody() << " let b_value = b_data[i];\n" + " let b_value_lower = unpack4xU8(b_value & 0x0F0F0F0Fu);\n" + " let b_value_upper = unpack4xU8((b_value >> 4) & 0x0F0F0F0Fu);\n" + " let b_quantized_values = mat2x4(output_element_t(b_value_lower[0]), output_element_t(b_value_upper[0]), output_element_t(b_value_lower[1]), output_element_t(b_value_upper[1]), output_element_t(b_value_lower[2]), output_element_t(b_value_upper[2]), output_element_t(b_value_lower[3]), output_element_t(b_value_upper[3]));\n" + " let b_dequantized_values = (b_quantized_values - mat2x4(zero_point, zero_point, zero_point, zero_point, zero_point, zero_point, zero_point, zero_point)) * scale;\n"; + for (uint32_t i = 0; i < tile_m_; i++) { + if (i == 0) { + shader.MainFunctionBody() << " var "; + } + shader.MainFunctionBody() << " a0 = subgroupShuffle(a_data" << i << ", i * 2);\n"; + if (i == 0) { + shader.MainFunctionBody() << " var "; + } + shader.MainFunctionBody() << " a00 = subgroupShuffle(a_data" << i << ", i * 2 + 8);\n"; + if (i == 0) { + shader.MainFunctionBody() << " var "; + } + shader.MainFunctionBody() << " a1 = subgroupShuffle(a_data" << i << ", i * 2 + 1);\n"; + if (i == 0) { + shader.MainFunctionBody() << " var "; + } + shader.MainFunctionBody() << " a11 = subgroupShuffle(a_data" << i << ", i * 2 + 9);\n"; + shader.MainFunctionBody() << " inter_results[" << i << "][in_y][in_x] += dot(select(a00, a0, local_idx % 2 == 0), b_dequantized_values[0]) + dot(select(a11, a1, local_idx % 2 == 0), b_dequantized_values[1]);\n"; } - } else { + shader.MainFunctionBody() << " word_offset += " << 8 / a.NumComponents() << ";\n" + << " }\n"; + shader.MainFunctionBody() << " } else {\n"; + shader.MainFunctionBody() << " for (var i: u32 = 0; i < 4; i++) {\n"; + shader.MainFunctionBody() << " let b_value = b_data[i];\n" + " let b_value_lower = unpack4xU8(b_value & 0x0F0F0F0Fu);\n" + " let b_value_upper = unpack4xU8((b_value >> 4) & 0x0F0F0F0Fu);\n" + " let b_quantized_values = mat2x4(output_element_t(b_value_lower[0]), output_element_t(b_value_upper[0]), output_element_t(b_value_lower[1]), output_element_t(b_value_upper[1]), output_element_t(b_value_lower[2]), output_element_t(b_value_upper[2]), output_element_t(b_value_lower[3]), output_element_t(b_value_upper[3]));\n" + " let b_dequantized_values = (b_quantized_values - mat2x4(zero_point, zero_point, zero_point, zero_point, zero_point, zero_point, zero_point, zero_point)) * scale;\n"; for (uint32_t i = 0; i < tile_m_; i++) { + if (i == 0) { + shader.MainFunctionBody() << " var "; + } + shader.MainFunctionBody() << " a0 = subgroupShuffle(a_data" << i << ", word_offset);\n"; + if (i == 0) { + shader.MainFunctionBody() << " var "; + } + shader.MainFunctionBody() << " a1 = subgroupShuffle(a_data" << i << ", word_offset + 1);\n"; + shader.MainFunctionBody() << " inter_results[" << i << "][in_y][in_x] += dot(a0, b_dequantized_values[0]) + dot(a1, b_dequantized_values[1]);\n"; + } + shader.MainFunctionBody() << " word_offset += " << 8 / a.NumComponents() << ";\n"; + shader.MainFunctionBody() << " }\n"; + shader.MainFunctionBody() << " }\n"; + + shader.MainFunctionBody() << " }\n"; + shader.MainFunctionBody() << " if (local_idx < " << WorkgroupSizeY() * tile_m_ << ") {\n" + << " let inner_row = local_idx / " << WorkgroupSizeY() << ";\n" + << " let inner_col = local_idx % " << WorkgroupSizeY() << ";\n" + << " var output_value = output_value_t(0);\n" + << " for (var b = 0u; b < " << WorkgroupSizeX() << "; b++) {\n" + << " output_value += inter_results[inner_row][inner_col][b];\n" + " }\n" + " if (row + inner_row < uniforms.output_shape[1] && col + inner_col < uniforms.output_shape[2]) {\n" + << " " << y.SetByIndices("output_indices_t(batch, row + inner_row, col + inner_col)", "output_value") << ";\n" + << " }\n" + " }\n"; + } else { + if (tile_m_ == 1) { + shader.AdditionalImplementation() << "fn mm_readA(batch : u32, row : u32, col : u32) -> input_a_value_t {\n" + " if (col < uniforms.input_a_shape[2]) {\n" + << " return " << a.GetByIndices("input_a_indices_t(batch, row, col)") << ";\n" + << " } else {\n" + " return input_a_value_t(0);\n" + " }\n" + "}\n" + << "var sub_a: array;\n" + << "var inter_results: array, " << WorkgroupSizeY() << ">;\n"; + std::string offset = "workgroup_idx * " + std::to_string(WorkgroupSizeY()); + shader.MainFunctionBody() << " let output_indices = " << y.OffsetToIndices(offset) << ";\n" + << " let col = output_indices[2];\n" + " let row = output_indices[1];\n" + " let batch = output_indices[0];\n"; + } else { + ORT_ENFORCE(tile_m_ < WorkgroupSizeY(), "tile_m must be less than or equal to WorkgroupSizeY."); + ORT_ENFORCE(WorkgroupSizeX() == WorkgroupSizeY(), "WorkgroupSizeX must be equal to WorkgroupSizeY."); + + shader.AdditionalImplementation() << "fn mm_readA(batch : u32, row : u32, col : u32) -> input_a_value_t {\n" + " if (row < uniforms.input_a_shape[1] && col < uniforms.input_a_shape[2]) {\n" + << " return " << a.GetByIndices("input_a_indices_t(batch, row, col)") << ";\n" + << " } else {\n" + " return input_a_value_t(0);\n" + " }\n" + "}\n" + << "var sub_a: array," << tile_m_ << ">;\n" + << "var inter_results: array, " << WorkgroupSizeY() << ">," << tile_m_ << ">;\n"; + shader.MainFunctionBody() << " let col = workgroup_id.x * " << WorkgroupSizeY() << ";\n" + << " let row = workgroup_id.y * " << tile_m_ << ";\n" + << " let batch = workgroup_id.z;\n"; + } + shader.MainFunctionBody() << " let n_blocks_per_col = uniforms.input_b_shape[1];\n" + << " let num_tiles = (n_blocks_per_col - 1) / " << blocks_per_tile << " + 1;\n" + // Loop over shared dimension. + << " for (var tile: u32 = 0; tile < num_tiles; tile += 1) {\n" + << " let a_col_start = tile * " << a_length_per_tile << ";\n" + << " // load one tile A data into shared memory.\n" + << " for (var a_offset = local_idx; a_offset < " << a_length_per_tile << "; a_offset += " << workgroup_size << ") {\n" + << " let a_col = a_col_start + a_offset;\n"; + if (tile_m_ == 1) { + shader.MainFunctionBody() << " sub_a[a_offset] = mm_readA(batch, row, a_col);\n"; + } else { + for (uint32_t i = 0; i < tile_m_; i++) { + shader.MainFunctionBody() << " sub_a[" << i << "][a_offset] = mm_readA(batch, row + " << i << ", a_col);\n"; + } + } + shader.MainFunctionBody() << " }\n" + " workgroupBarrier();\n" + // Each thread processes one block. + " let b_row = col + local_id.y;\n" + << " let block = tile * " << blocks_per_tile << " + local_id.x;\n"; + if (has_zero_points_) { + const auto& zero_points = shader.AddInput("zero_points", ShaderUsage::UseUniform); + shader.MainFunctionBody() << " let zero_point_bytes_per_col = (n_blocks_per_col + 1) / 2;\n" + " let zero_point_byte_count = b_row * zero_point_bytes_per_col + (block >> 0x1u);\n" + " let zero_point_word_index = zero_point_byte_count >> 0x2u;\n" + " let zero_point_byte_offset = zero_point_byte_count & 0x3u;\n" + " let zero_point_nibble_offset: u32 = block & 0x1u;\n" + " let zero_point_bits_offset = (zero_point_byte_offset << 3) + (zero_point_nibble_offset << 2);\n" + << " let zero_point_word = " << zero_points.GetByOffset("zero_point_word_index") << " >> zero_point_bits_offset;\n" + << " let zero_point = output_element_t((zero_point_word) & 0xFu);\n"; + } else { + // The default zero point is 8 for unsigned 4-bit quantization. + shader.MainFunctionBody() << " let zero_point = output_element_t(8.0);\n"; + } + shader.MainFunctionBody() << " var scale = output_element_t(0);\n" + " var b_data = input_b_value_t(0);\n" + << " if (block < n_blocks_per_col) {\n" + << " scale = " << scales.GetByOffset("b_row * n_blocks_per_col + block") << ";\n" + << " b_data = " << b.GetByIndices("input_b_indices_t(b_row, block, 0)") << ";\n" + << " }\n" + << " var word_offset = local_id.x * " << block_size_ / a.NumComponents() << ";\n" + << " for (var i: u32 = 0; i < " << components_b_ << "; i++) {\n"; + shader.MainFunctionBody() << " let b_value = b_data"; + if (components_b_ > 1) { + shader.MainFunctionBody() << "[i]"; + } + shader.MainFunctionBody() << ";\n" + " let b_value_lower = unpack4xU8(b_value & 0x0F0F0F0Fu);\n" + " let b_value_upper = unpack4xU8((b_value >> 4) & 0x0F0F0F0Fu);\n" + " let b_quantized_values = mat2x4(output_element_t(b_value_lower[0]), output_element_t(b_value_upper[0]), output_element_t(b_value_lower[1]), output_element_t(b_value_upper[1]), output_element_t(b_value_lower[2]), output_element_t(b_value_upper[2]), output_element_t(b_value_lower[3]), output_element_t(b_value_upper[3]));\n" + " let b_dequantized_values = (b_quantized_values - mat2x4("; + for (int i = 0; i < 8; i++) { + shader.MainFunctionBody() << "zero_point"; + if (i < 7) { + shader.MainFunctionBody() << ", "; + } + } + shader.MainFunctionBody() << ")) * scale;\n"; + if (tile_m_ == 1) { switch (a.NumComponents()) { case 1: - shader.MainFunctionBody() << " inter_results[" << i << "][local_id.y][local_id.x] += dot(vec4(sub_a[" << i << "][word_offset], sub_a[" << i << "][word_offset + 1], sub_a[" << i << "][word_offset + 2], sub_a[" << i << "][word_offset + 3]), b_dequantized_values[0]) + dot(vec4(sub_a[" << i << "][word_offset + 4], sub_a[" << i << "][word_offset + 5], sub_a[" << i << "][word_offset + 6], sub_a[" << i << "][word_offset + 7]), b_dequantized_values[1]);\n"; + shader.MainFunctionBody() << " inter_results[local_id.y][local_id.x] += dot(vec4(sub_a[word_offset], sub_a[word_offset + 1], sub_a[word_offset + 2], sub_a[word_offset + 3]), b_dequantized_values[0]) + dot(vec4(sub_a[word_offset + 4], sub_a[word_offset + 5], sub_a[word_offset + 6], sub_a[word_offset + 7]), b_dequantized_values[1]);\n"; break; case 2: - shader.MainFunctionBody() << " inter_results[" << i << "][local_id.y][local_id.x] += dot(vec4(sub_a[" << i << "][word_offset], sub_a[" << i << "][word_offset + 1]), b_dequantized_values[0]) + dot(vec4(sub_a[" << i << "][word_offset + 2], sub_a[" << i << "][word_offset + 3]), b_dequantized_values[1]);\n"; + shader.MainFunctionBody() << " inter_results[local_id.y][local_id.x] += dot(vec4(sub_a[word_offset], sub_a[word_offset + 1]), b_dequantized_values[0]) + dot(vec4(sub_a[word_offset + 2], sub_a[word_offset + 3]), b_dequantized_values[1]);\n"; break; case 4: - shader.MainFunctionBody() << " inter_results[" << i << "][local_id.y][local_id.x] += dot(sub_a[" << i << "][word_offset], b_dequantized_values[0]) + dot(sub_a[" << i << "][word_offset + 1], b_dequantized_values[1]);\n"; + shader.MainFunctionBody() << " inter_results[local_id.y][local_id.x] += dot(sub_a[word_offset], b_dequantized_values[0]) + dot(sub_a[word_offset + 1], b_dequantized_values[1]);\n"; break; default: break; } + } else { + for (uint32_t i = 0; i < tile_m_; i++) { + switch (a.NumComponents()) { + case 1: + shader.MainFunctionBody() << " inter_results[" << i << "][local_id.y][local_id.x] += dot(vec4(sub_a[" << i << "][word_offset], sub_a[" << i << "][word_offset + 1], sub_a[" << i << "][word_offset + 2], sub_a[" << i << "][word_offset + 3]), b_dequantized_values[0]) + dot(vec4(sub_a[" << i << "][word_offset + 4], sub_a[" << i << "][word_offset + 5], sub_a[" << i << "][word_offset + 6], sub_a[" << i << "][word_offset + 7]), b_dequantized_values[1]);\n"; + break; + case 2: + shader.MainFunctionBody() << " inter_results[" << i << "][local_id.y][local_id.x] += dot(vec4(sub_a[" << i << "][word_offset], sub_a[" << i << "][word_offset + 1]), b_dequantized_values[0]) + dot(vec4(sub_a[" << i << "][word_offset + 2], sub_a[" << i << "][word_offset + 3]), b_dequantized_values[1]);\n"; + break; + case 4: + shader.MainFunctionBody() << " inter_results[" << i << "][local_id.y][local_id.x] += dot(sub_a[" << i << "][word_offset], b_dequantized_values[0]) + dot(sub_a[" << i << "][word_offset + 1], b_dequantized_values[1]);\n"; + break; + default: + break; + } + } } - } - shader.MainFunctionBody() << " word_offset += " << 8 / a.NumComponents() << ";\n" - << " }\n" - " workgroupBarrier();\n" - " }\n"; - if (tile_m_ == 1) { - shader.MainFunctionBody() << " if (local_idx < " << WorkgroupSizeY() << ") {\n" - << " var output_value = output_value_t(0);\n" - << " for (var b = 0u; b < " << WorkgroupSizeX() << "; b++) {\n" - << " output_value += inter_results[local_idx][b];\n" - " }\n" - " if (col + local_idx < uniforms.output_shape[2]) {\n" - << " " << y.SetByIndices("output_indices_t(batch, row, col + local_idx)", "output_value") << ";\n" - << " }\n" - " }\n"; - } else { - shader.MainFunctionBody() << " if (local_id.y < " << tile_m_ << ") {\n" - << " var output_value = output_value_t(0);\n" - << " for (var b = 0u; b < " << WorkgroupSizeX() << "; b++) {\n" - << " output_value += inter_results[local_id.y][local_id.x][b];\n" - " }\n" - " if (row + local_id.y < uniforms.output_shape[1] && col + local_id.x < uniforms.output_shape[2]) {\n" - << " " << y.SetByIndices("output_indices_t(batch, row + local_id.y, col + local_id.x)", "output_value") << ";\n" + shader.MainFunctionBody() << " word_offset += " << 8 / a.NumComponents() << ";\n" << " }\n" + " workgroupBarrier();\n" " }\n"; + if (tile_m_ == 1) { + shader.MainFunctionBody() << " if (local_idx < " << WorkgroupSizeY() << ") {\n" + << " var output_value = output_value_t(0);\n" + << " for (var b = 0u; b < " << WorkgroupSizeX() << "; b++) {\n" + << " output_value += inter_results[local_idx][b];\n" + " }\n" + " if (col + local_idx < uniforms.output_shape[2]) {\n" + << " " << y.SetByIndices("output_indices_t(batch, row, col + local_idx)", "output_value") << ";\n" + << " }\n" + " }\n"; + } else { + shader.MainFunctionBody() << " if (local_id.y < " << tile_m_ << ") {\n" + << " var output_value = output_value_t(0);\n" + << " for (var b = 0u; b < " << WorkgroupSizeX() << "; b++) {\n" + << " output_value += inter_results[local_id.y][local_id.x][b];\n" + " }\n" + " if (row + local_id.y < uniforms.output_shape[1] && col + local_id.x < uniforms.output_shape[2]) {\n" + << " " << y.SetByIndices("output_indices_t(batch, row + local_id.y, col + local_id.x)", "output_value") << ";\n" + << " }\n" + " }\n"; + } } } else { const std::string quantized_data_type = QuantizedDataType(a.NumComponents()); @@ -374,6 +530,222 @@ Status MatMulNBitsProgram::GenerateShaderCode(ShaderHelper& shader) const { return Status::OK(); } +Status DP4AMatMulQuantizeProgram::GenerateShaderCode(ShaderHelper& shader) const { + shader.AddInput("input_a", ShaderUsage::UseUniform | ShaderUsage::UseIndicesTypeAlias | ShaderUsage::UseValueTypeAlias | ShaderUsage::UseElementTypeAlias); + shader.AddOutput("output", ShaderUsage::UseUniform); + shader.AddOutput("scales", ShaderUsage::UseUniform); + + shader.AdditionalImplementation() << R"ADDNL_FN( + var max_values : array; + )ADDNL_FN"; + + shader.MainFunctionBody() << R"MAIN_FN( + var local_a = input_a[global_idx]; + var max_val = subgroupMax(abs(local_a)); + var max_temp = max(max_val.xy, max_val.zw); + var scale = max(max_temp[0], max_temp[1]); + if (local_idx % sg_size == 0) { + max_values[local_idx / sg_size] = scale; + } + workgroupBarrier(); + + if (sg_size == 8) + { + scale = max(max_values[0], max_values[1]); + scale = max(scale, max_values[2]); + scale = max(scale, max_values[3]); + } + else if (sg_size == 16) + { + scale = max(max_values[0], max_values[1]); + } + else + { + scale = max_values[0]; + } + + var norm_a = local_a/scale; + output[global_idx] = pack4x8snorm(vec4(norm_a)); + if (local_idx == 0) + { + // 127 is the max value of signed int8 [-127,127] used by pack4x8snorm for 1.0f. + scales[workgroup_idx] = scale/127; + } +)MAIN_FN"; + return Status::OK(); +} + +Status DP4AMatMulNBitsProgram::GenerateShaderCode(ShaderHelper& shader) const { + shader.AddInput("input_a", ShaderUsage::UseUniform | ShaderUsage::UseIndicesTypeAlias | ShaderUsage::UseValueTypeAlias); + shader.AddInput("scales_a", ShaderUsage::UseUniform); + shader.AddInput("input_b", ShaderUsage::UseUniform); + shader.AddInput("scales_b", ShaderUsage::UseUniform); + shader.AddOutput("output", ShaderUsage::UseUniform | ShaderUsage::UseElementTypeAlias); + + // This shader implements co-operative matrix multiply. The key idea here is to + // assume there is a primitive for medium size matrix multiply a subgroup can perform, + // using all its lanes and pooling all its registers to keep the values in registry. + // + // The entire workgroup which has N subgroups first loads a tile into shared memory, + // Then each subgroup loads a subtile from shared memory into registers and uses + // the medium size matrix multiply primitive to perform the math. + // The values for tile/subtile size are chosen to conform to the resource limits + // of an alderlake/tiger lake gpu. A tile is 64x64, workgroup is 256 threads - + // therefore there are 16 subgroups and 16 lanes in each subgroup. + // K the hidden dimension is paged in from RAM at k tile size which is 64. + // All this puts the shared memory requirement slightly above 16KB. + // WebGPU limit is 16KB, output is moved to registers instead of SHM to make + // everything fit in shared memory. + // + // Each subgroup performs a 16 x 64 x 16 multiply which is implemented with + // subgroup shuffle as a placeholder for the day the medium matrix mul primitive + // becomes available in WGSL. The registry requirements is ~2KB per subgroup, on + // Alderlake/Tigerlake subgroup has 8KB of registry space pooling the + // 512B of registry from each lane. + // + // The medium size matmul is implemented using dot4I8Packed, so the inputs for + // this shader require A to be int8 quantized with block size 64. B is regular + // matmulnbits input with block size 32. + + shader.AdditionalImplementation() << R"ADDNL_FN( + const tile_size = 64; + const subtile_size = 16; + const tile_size_k = 32; + const vec_factor = 4; + const u32_factor = 4; + const tile_size_k_vec = 4; + const block_size = 32; + + // Shared memory + var tile_A : array, tile_size_k_vec>, tile_size>; // 64 x 32 + var scale_A : array; // 64 x 1 + var tile_B : array, tile_size_k_vec>, tile_size>; // 64 x 32 + var scale_B : array; // 64 x 1 + + // Private memory + var lane_output: array; + + fn loadSHMA(a_global_base:u32, kidx_v:u32, row: u32, col: u32) + { + let a_global = a_global_base + row; + if (a_global >= uniforms.M) + { + return; + } + tile_A[row][col] = input_a[a_global*uniforms.K8+kidx_v+col]; + if (col == 0) + { + // kidx_v - covers 8 values of k + scale_A[row] = scales_a[a_global*(uniforms.K/128) + kidx_v/16]; + } + } + + fn loadSHMB(b_global_base:u32, kidx_v:u32, row: u32, col: u32) + { + let b_global = b_global_base + row; + if (b_global >= uniforms.N) + { + return; + } + + let b_value = input_b[b_global*uniforms.K8+kidx_v+col]; + var b_value_lower = vec4(unpack4xU8(b_value & 0x0F0F0F0Fu)) - vec4(8); + var b_value_upper = vec4(unpack4xU8((b_value >> 4) & 0x0F0F0F0Fu)) - vec4(8); + tile_B[row][col][0] = pack4xI8(vec4(b_value_lower[0], b_value_upper[0], b_value_lower[1], b_value_upper[1])); + tile_B[row][col][1] = pack4xI8(vec4(b_value_lower[2], b_value_upper[2], b_value_lower[3], b_value_upper[3])); + if (col == 0) + { + // kidx_v - each kidx_v covers 8 values of k + scale_B[row] = scales_b[b_global*(uniforms.K/32) + kidx_v/4]; + } + } + + fn DP4AI(a:vec4, b:vec4) -> i32 + { + var local_sum = dot4I8Packed(a[0], b[0]); + local_sum += dot4I8Packed(a[1], b[1]); + local_sum += dot4I8Packed(a[2], b[2]); + local_sum += dot4I8Packed(a[3], b[3]); + return local_sum; + } + +)ADDNL_FN"; + + shader.MainFunctionBody() << R"MAIN_FN( + // During the load phase we use all 256 threads to load 64 rows of A/B. + // For each row we load 4 vectorized elements, which are 32 elements of K. + let a_global_base = workgroup_id.x * tile_size; + let b_global_base = workgroup_id.y * tile_size; + let load_row = u32(local_idx/4); + let load_col = u32(local_idx%4); + + // During the compute phase, we have the 64x64 tile split into + // subtiles of 16x16. We have a grid of 4x4 subtiles. + let subtile_id = u32(local_idx / subtile_size); + let subtile_idx = u32(subtile_id / 4); + let subtile_idy = u32(subtile_id % 4); + let base_A = subtile_idx * 16; + let base_B = subtile_idy * 16; + // For each subtile we have 16 threads assigned. + let a_idx = u32(local_idx % subtile_size); + + // K's vectrorization is 8 items per index. See input_a/input_b. + // tile_size_k_vec - is the k tile size in vectorized k units/space (1/8). + for (var kidx_v:u32 = 0; kidx_v < uniforms.K8; kidx_v+=tile_size_k_vec) + { + // Populate shared memory for the workgroup + loadSHMA(a_global_base, kidx_v, load_row, load_col); + loadSHMB(b_global_base, kidx_v, load_row, load_col); + workgroupBarrier(); + + var own_a0: vec4 = vec4(tile_A[base_A + a_idx][0], tile_A[base_A + a_idx][1]); + var own_a1: vec4 = vec4(tile_A[base_A + a_idx][2], tile_A[base_A + a_idx][3]); + var own_scale_a = scale_A[base_A + a_idx]; + if (sg_size == 16) + { + var own_b0: vec4 = vec4(tile_B[base_B + sg_id][0], tile_B[base_B + sg_id][1]); + var own_b1: vec4 = vec4(tile_B[base_B + sg_id][2], tile_B[base_B + sg_id][3]); + var own_scale_b = scale_B[base_B + sg_id]; + for (var col:u32 = 0; col < 16; col++) + { + var local_scale_b = subgroupShuffle(own_scale_b, col); + local_scale_b = local_scale_b * own_scale_a; + var local_sum = DP4AI(own_a0, subgroupShuffle(own_b0, col)); + local_sum += DP4AI(own_a1, subgroupShuffle(own_b1, col)); + lane_output[col] += (output_element_t(local_sum) * local_scale_b); + } + } + else + { + for (var col:u32 = 0; col < 16; col++) + { + var b0: vec4 = vec4(tile_B[base_B + col][0], tile_B[base_B + col][1]); + var b1: vec4 = vec4(tile_B[base_B + col][2], tile_B[base_B + col][3]); + var local_sum = DP4AI(own_a0, b0); + local_sum += DP4AI(own_a1, b1); + lane_output[col] += (output_element_t(local_sum) * own_scale_a * scale_B[base_B + col]); + } + } + workgroupBarrier(); + } + + let a_global = a_global_base + base_A + a_idx; + let b_global = b_global_base + base_B; + let output_idx = ((a_global) * uniforms.N + b_global)/4; + // This creates a shader requirement that uniforms.N % 16 == 0 + if (a_global < uniforms.M && b_global < uniforms.N) + { + for (var i:u32 = 0; i < 4; i++) + { + let lidx = i * 4; + output[output_idx+i] = vec4(lane_output[lidx], lane_output[lidx+1] , lane_output[lidx+2], lane_output[lidx+3]); + } + } +)MAIN_FN"; + + return Status::OK(); +} + Status MatMulNBits::ComputeInternal(onnxruntime::webgpu::ComputeContext& context) const { const Tensor* a = context.Input(0); const Tensor* b = context.Input(1); @@ -409,11 +781,55 @@ Status MatMulNBits::ComputeInternal(onnxruntime::webgpu::ComputeContext& context uint32_t components = GetMaxComponents(N); const bool has_zero_points = zero_points != nullptr; + const bool has_subgroup = context.Device().HasFeature(wgpu::FeatureName::Subgroups); + // macOS - Avoid using dp4a on Metal, as it does not appear to have native dp4a support. + // https://github.com/gpuweb/gpuweb/issues/2677#issuecomment-1713292226 + const bool use_dp4a = has_subgroup && context.AdapterInfo().backendType != wgpu::BackendType::Metal; + if (accuracy_level_ == 4 && block_size == 32 && + batch_count == 1 && components_a == 4 && K % 64 == 0 && N % 16 == 0 && + !has_zero_points && use_dp4a && M >= kMinMForTileOptimization) { + constexpr uint32_t kVec4Components = 4; + constexpr uint32_t kVec2Components = 2; + constexpr uint32_t kU32Components = 4; + + constexpr uint32_t kBlockSizeA = 128; + DP4AMatMulQuantizeProgram quantize_program; + quantize_program.SetWorkgroupSize(32); + quantize_program.SetDispatchGroupSize(M * K / kBlockSizeA, 1, 1); + TensorShape a_quant_shape{1, M, K / kU32Components}; + Tensor a_quant = context.CreateGPUTensor(DataTypeImpl::GetType(), a_quant_shape); + TensorShapeVector a_scales_dims({1, 1, M, K / kBlockSizeA}); + Tensor a_scale = context.CreateGPUTensor(a->DataType(), a_scales_dims); + quantize_program.AddInputs({{a, ProgramTensorMetadataDependency::TypeAndRank, gsl::narrow(kVec4Components)}}) + .AddOutputs({{&a_quant, ProgramTensorMetadataDependency::Rank, a_quant.Shape(), gsl::narrow(1)}, + {&a_scale, ProgramTensorMetadataDependency::Rank, a_scale.Shape(), gsl::narrow(1)}}); + ORT_RETURN_IF_ERROR(context.RunProgram(quantize_program)); + + constexpr uint32_t kTileSize = 64; + TensorShape reshaped_y_shape{1, M, N / kVec4Components}; + DP4AMatMulNBitsProgram mul_program; + mul_program.SetWorkgroupSize(256); + mul_program.SetDispatchGroupSize( + (M + kTileSize - 1) / kTileSize, + (N + kTileSize - 1) / kTileSize, 1); + mul_program.AddInputs({{&a_quant, ProgramTensorMetadataDependency::TypeAndRank, gsl::narrow(kVec2Components)}, + {&a_scale, ProgramTensorMetadataDependency::TypeAndRank, gsl::narrow(1)}, + {b, ProgramTensorMetadataDependency::TypeAndRank, gsl::narrow(kU32Components)}, + {scales, ProgramTensorMetadataDependency::TypeAndRank, gsl::narrow(1)}}) + .AddUniformVariables({{static_cast(M)}, + {static_cast(N)}, + {static_cast(K)}, + {static_cast(K / 8)}, + {static_cast(K / 16)}}) + .AddOutput({y, ProgramTensorMetadataDependency::TypeAndRank, reshaped_y_shape, gsl::narrow(kVec4Components)}); + return context.RunProgram(mul_program); + } // TODO: Support output_number > 1. Some cases are failed when output_number > 1. constexpr uint32_t output_number = 1; const uint32_t tile_m = M > kMinMForTileOptimization ? 4 : 1; - MatMulNBitsProgram program{output_number, block_size, tile_m, gsl::narrow(components_b), has_zero_points}; + const bool use_subgroup = has_subgroup && context.AdapterInfo().vendor == std::string_view{"intel"} && components_a == 4 && block_size == 32; + MatMulNBitsProgram program{output_number, block_size, tile_m, gsl::narrow(components_b), has_zero_points, use_subgroup}; if (M > kMinMForTileOptimization && block_size == 32) { components = 1; constexpr uint32_t workgroup_size = 64; @@ -423,12 +839,11 @@ Status MatMulNBits::ComputeInternal(onnxruntime::webgpu::ComputeContext& context program.SetDispatchGroupSize((N + workgroup_y - 1) / workgroup_y, (M + tile_m - 1) / tile_m, batch_count); - program.CacheHint("T_M" + std::to_string(tile_m)); + program.CacheHint("T_M" + std::to_string(tile_m) + "Subgroup" + std::to_string(use_subgroup)); } else if (block_size == 32) { components = 1; - constexpr uint32_t workgroup_size = 128; - const uint32_t workgroup_y = N % 8 == 0 ? 8 : N % 4 == 0 ? 4 - : 1; + constexpr uint32_t workgroup_size = 64; + const uint32_t workgroup_y = N % 8 == 0 ? 8 : 1; const uint32_t workgroup_x = workgroup_size / workgroup_y; program.SetWorkgroupSize(workgroup_x, workgroup_y, 1); program.SetDispatchGroupSize(data_size / components / workgroup_y); diff --git a/onnxruntime/contrib_ops/webgpu/quantization/matmul_nbits.h b/onnxruntime/contrib_ops/webgpu/quantization/matmul_nbits.h index 57615d3ddabcf..a2470d9268907 100644 --- a/onnxruntime/contrib_ops/webgpu/quantization/matmul_nbits.h +++ b/onnxruntime/contrib_ops/webgpu/quantization/matmul_nbits.h @@ -14,12 +14,13 @@ using namespace onnxruntime::webgpu; class MatMulNBitsProgram final : public Program { public: - MatMulNBitsProgram(uint32_t output_number, uint32_t block_size, uint32_t tile_m, int components_b, bool has_zero_points) : Program{"MatMulNBits"}, - output_number_{output_number}, - block_size_{block_size}, - tile_m_{tile_m}, - components_b_{components_b}, - has_zero_points_{has_zero_points} { + MatMulNBitsProgram(uint32_t output_number, uint32_t block_size, uint32_t tile_m, int components_b, bool has_zero_points, bool use_subgroup) : Program{"MatMulNBits"}, + output_number_{output_number}, + block_size_{block_size}, + tile_m_{tile_m}, + components_b_{components_b}, + has_zero_points_{has_zero_points}, + use_subgroup_(use_subgroup) { } Status GenerateShaderCode(ShaderHelper& sh) const override; @@ -31,6 +32,25 @@ class MatMulNBitsProgram final : public Program { uint32_t tile_m_; int components_b_; bool has_zero_points_; + bool use_subgroup_; +}; + +class DP4AMatMulQuantizeProgram final : public Program { + public: + DP4AMatMulQuantizeProgram() : Program{"DP4AMatMulQuantize"} {} + Status GenerateShaderCode(ShaderHelper& sh) const override; +}; + +class DP4AMatMulNBitsProgram final : public Program { + public: + DP4AMatMulNBitsProgram() : Program{"DP4AMatMulNBits"} {} + Status GenerateShaderCode(ShaderHelper& sh) const override; + WEBGPU_PROGRAM_DEFINE_UNIFORM_VARIABLES( + {"M", ProgramUniformVariableDataType::Uint32}, + {"N", ProgramUniformVariableDataType::Uint32}, + {"K", ProgramUniformVariableDataType::Uint32}, + {"K8", ProgramUniformVariableDataType::Uint32}, + {"K16", ProgramUniformVariableDataType::Uint32}); }; class MatMulNBits final : public WebGpuKernel { @@ -40,6 +60,7 @@ class MatMulNBits final : public WebGpuKernel { N_ = info.GetAttr("N"); block_size_ = info.GetAttr("block_size"); int64_t bits = info.GetAttr("bits"); + accuracy_level_ = info.GetAttrOrDefault("accuracy_level", 4); ORT_ENFORCE(bits == 4, "Only 4b quantization is supported for MatMulNBits op, additional bits support is planned."); } @@ -50,6 +71,7 @@ class MatMulNBits final : public WebGpuKernel { int64_t K_; int64_t N_; int64_t block_size_; + int64_t accuracy_level_; }; } // namespace webgpu diff --git a/onnxruntime/core/framework/allocator.cc b/onnxruntime/core/framework/allocator.cc index 26b98b0a04d24..02dbb3e518783 100644 --- a/onnxruntime/core/framework/allocator.cc +++ b/onnxruntime/core/framework/allocator.cc @@ -155,11 +155,18 @@ ORT_API_STATUS_IMPL(OrtApis::CreateMemoryInfo, _In_ const char* name1, enum OrtA mem_type1); } else if (strcmp(name1, onnxruntime::CUDA_PINNED) == 0) { *out = new OrtMemoryInfo( - onnxruntime::CUDA_PINNED, type, OrtDevice(OrtDevice::CPU, OrtDevice::MemType::CUDA_PINNED, static_cast(id1)), + onnxruntime::CUDA_PINNED, type, + OrtDevice(OrtDevice::CPU, OrtDevice::MemType::CUDA_PINNED, static_cast(id1)), id1, mem_type1); } else if (strcmp(name1, onnxruntime::HIP_PINNED) == 0) { *out = new OrtMemoryInfo( - onnxruntime::HIP_PINNED, type, OrtDevice(OrtDevice::CPU, OrtDevice::MemType::HIP_PINNED, static_cast(id1)), + onnxruntime::HIP_PINNED, type, + OrtDevice(OrtDevice::CPU, OrtDevice::MemType::HIP_PINNED, static_cast(id1)), + id1, mem_type1); + } else if (strcmp(name1, onnxruntime::QNN_HTP_SHARED) == 0) { + *out = new OrtMemoryInfo( + onnxruntime::QNN_HTP_SHARED, type, + OrtDevice(OrtDevice::CPU, OrtDevice::MemType::QNN_HTP_SHARED, static_cast(id1)), id1, mem_type1); } else { return OrtApis::CreateStatus(ORT_INVALID_ARGUMENT, "Specified device is not supported."); diff --git a/onnxruntime/core/framework/debug_node_inputs_outputs_utils.cc b/onnxruntime/core/framework/debug_node_inputs_outputs_utils.cc index 607969cd4cdc4..7bd825a9b0bb1 100644 --- a/onnxruntime/core/framework/debug_node_inputs_outputs_utils.cc +++ b/onnxruntime/core/framework/debug_node_inputs_outputs_utils.cc @@ -8,7 +8,10 @@ #include "core/framework/print_tensor_statistics_utils.h" #include #include +#include #include +#include +#include #ifdef DEBUG_NODE_INPUTS_OUTPUTS_ENABLE_DUMP_TO_SQLDB #include @@ -22,6 +25,73 @@ namespace onnxruntime { namespace utils { +void NodeDumpAnalysis::Add(const std::string& node_name, const std::string& op_type, bool is_half_overflow) { + std::lock_guard lock(set_mutex); + if (is_half_overflow) { + auto p = half_overflow_nodes.insert(node_name); + if (p.second) { // insert succeeded + ++half_overflow_ops[op_type]; + } + } + + counter++; +} + +void NodeDumpAnalysis::PrintToStdOut(const std::string& model_path) { + std::lock_guard lock(set_mutex); + if (counter == 0) { + return; + } + + // We added counter twice per node (once for node inputs, once for node outputs), so we need to divide it by 2. + counter /= 2; + + std::cout << "Total counter in node dumping: " << counter << std::endl; + + if (!half_overflow_nodes.empty()) { + std::cout << "Found " << half_overflow_nodes.size() << " nodes cannot be converted to half precision due to potential input/output overflow." << std::endl; + + if (half_overflow_nodes.count("") > 0) { + std::cout << "Warning: some node name is empty and node_block_list is not completed. " + << "Please update the model to make sure each node has name then run this tool again!" << std::endl; + } + + // Sort and display the op frequency in the descending order + std::cout << "Operator frequencies for these nodes:" << std::endl; + std::vector> op_freq(half_overflow_ops.begin(), half_overflow_ops.end()); + std::sort(op_freq.begin(), op_freq.end(), + [](const std::pair& a, const std::pair& b) { + return b.second < a.second; + }); + for (const auto& pair : op_freq) { + std::cout << pair.first << " : " << pair.second << std::endl; + } + } else { + std::cout << "No node has potential overflow during half conversion so node_block_list is empty." << std::endl; + } + + std::cout << "# -------" << std::endl; + std::cout << "# Example python script for float16 conversion" << std::endl; + std::cout << "# For details, search `node_block_list` in https://github.com/microsoft/onnxruntime/blob/main/onnxruntime/python/tools/transformers/float16.py" << std::endl; + std::cout << "# -------" << std::endl; + std::cout << "from onnxruntime.transformers.onnx_model import OnnxModel" << std::endl; + std::cout << "m = OnnxModel(onnx.load('" << model_path << "'))" << std::endl; + if (!half_overflow_nodes.empty()) { + std::cout << "node_block_list = [" << std::endl; + for (const auto& node : half_overflow_nodes) { + if (!node.empty()) { + std::cout << " '" << node << "'," << std::endl; + } + } + std::cout << "]" << std::endl; + std::cout << "m.convert_float_to_float16(keep_io_types=False, node_block_list=node_block_list)" << std::endl; + } else { + std::cout << "m.convert_float_to_float16(keep_io_types=False)" << std::endl; + } + + std::cout << "m.save_model_to_file('fp16/optimized.onnx', use_external_data_format=False)" << std::endl; +} + namespace { struct TensorMetadata { @@ -59,10 +129,13 @@ bool FilterNode(const NodeDumpOptions& dump_options, const Node& node) { } template -void DumpTensorToStdOut(const Tensor& tensor, const NodeDumpOptions& dump_options) { - onnxruntime::utils::PrintCpuTensor(tensor, dump_options.snippet_threshold, dump_options.snippet_edge_items); - if (dump_options.dump_flags & NodeDumpOptions::DumpFlags::StatisticsData) { - onnxruntime::utils::PrintCpuTensorStats(tensor); +void DumpTensorToStdOut(const Tensor& tensor, const NodeDumpOptions& dump_options, TensorStatisticsData& tensor_statistics) { + if ((dump_options.dump_flags & NodeDumpOptions::DumpFlags::InputData) != 0) { + onnxruntime::utils::PrintCpuTensor(tensor, dump_options.snippet_threshold, dump_options.snippet_edge_items); + } + + if ((dump_options.dump_flags & NodeDumpOptions::DumpFlags::StatisticsData) != 0) { + onnxruntime::utils::PrintCpuTensorStats(tensor, tensor_statistics); } } @@ -295,10 +368,10 @@ void InsertNodePlacementToSqliteDb(const NodeDumpContext& dump_context, const No void DumpCpuTensor( const NodeDumpOptions& dump_options, - const Tensor& tensor, const TensorMetadata& tensor_metadata) { + const Tensor& tensor, const TensorMetadata& tensor_metadata, TensorStatisticsData& tensor_statistics) { switch (dump_options.data_destination) { case NodeDumpOptions::DataDestination::StdOut: { - DispatchOnTensorType(tensor.DataType(), DumpTensorToStdOut, tensor, dump_options); + DispatchOnTensorType(tensor.DataType(), DumpTensorToStdOut, tensor, dump_options, tensor_statistics); break; } case NodeDumpOptions::DataDestination::TensorProtoFiles: { @@ -321,7 +394,7 @@ void DumpCpuTensor( void DumpTensor( const NodeDumpOptions& dump_options, - const Tensor& tensor, TensorMetadata& tensor_metadata, + const Tensor& tensor, TensorMetadata& tensor_metadata, TensorStatisticsData& tensor_statistics, const SessionState& session_state) { // check tensor is on CPU before dumping it auto& tensor_location = tensor.Location(); @@ -329,7 +402,7 @@ void DumpTensor( tensor_location.mem_type == OrtMemTypeCPUInput || tensor_location.mem_type == OrtMemTypeCPUOutput) { tensor_metadata.device_type = "CPU"; - DumpCpuTensor(dump_options, tensor, tensor_metadata); + DumpCpuTensor(dump_options, tensor, tensor_metadata, tensor_statistics); } else { std::cout << tensor_location << "\n"; @@ -345,7 +418,7 @@ void DumpTensor( auto status = data_transfer_mgr.CopyTensor(tensor, cpu_tensor); if (status == common::Status::OK()) { tensor_metadata.device_type = "GPU"; - DumpCpuTensor(dump_options, cpu_tensor, tensor_metadata); + DumpCpuTensor(dump_options, cpu_tensor, tensor_metadata, tensor_statistics); } else { std::cout << " failed to transfer data to cpu.\n"; } @@ -383,6 +456,11 @@ const NodeDumpOptions& NodeDumpOptionsFromEnvironmentVariables() { if (ParseEnvironmentVariableWithDefault(env_vars::kDumpStatisticsData, false)) { opts.dump_flags |= NodeDumpOptions::DumpFlags::StatisticsData; } + if (ParseEnvironmentVariableWithDefault(env_vars::kDumpHalfConversionOverflow, false)) { + // Statistics data is required for half conversion overflow detection. + opts.dump_flags |= NodeDumpOptions::DumpFlags::StatisticsData; + opts.dump_flags |= NodeDumpOptions::DumpFlags::HalfConversionOverflow; + } opts.filter.name_pattern = Env::Default().GetEnvironmentVar(env_vars::kNameFilter); opts.filter.op_type_pattern = Env::Default().GetEnvironmentVar(env_vars::kOpTypeFilter); @@ -402,6 +480,13 @@ const NodeDumpOptions& NodeDumpOptionsFromEnvironmentVariables() { opts.snippet_threshold = ParseEnvironmentVariableWithDefault(env_vars::kSnippetThreshold, kDefaultSnippetThreshold); opts.snippet_edge_items = ParseEnvironmentVariableWithDefault(env_vars::kSnippetEdgeItems, kDefaultSnippetEdgeItems); + constexpr int kMaxHalfThreshold = 65504; + // The default value is set to have reasonable margin for input variance. + int threshold = ParseEnvironmentVariableWithDefault(env_vars::kHalfOverflowThreshold, 50000); + ORT_ENFORCE(threshold > 0 && threshold <= kMaxHalfThreshold, + debug_node_inputs_outputs_env_vars::kHalfOverflowThreshold, " shall be a positive integer <= ", kMaxHalfThreshold); + opts.half_overflow_threshold = static_cast(threshold); + if (ParseEnvironmentVariableWithDefault(env_vars::kAppendRankToFileName, false)) { std::string rank = Env::Default().GetEnvironmentVar("OMPI_COMM_WORLD_RANK"); if (rank.empty()) { @@ -452,7 +537,8 @@ void DumpNodeInputs( const NodeDumpContext& dump_context, const OpKernelContext& context, const Node& node, - const SessionState& session_state) { + const SessionState& session_state, + NodeDumpAnalysis& dump_analysis) { const bool is_any_output_dumped = IsAnyOutputDumped(dump_options); if (!is_any_output_dumped) { return; @@ -477,6 +563,9 @@ void DumpNodeInputs( const auto& input_defs = node.InputDefs(); TensorMetadata tensor_metadata; + bool check_half_overflow = (dump_options.data_destination == NodeDumpOptions::DataDestination::StdOut) && + (dump_options.dump_flags & NodeDumpOptions::DumpFlags::HalfConversionOverflow) != 0; + bool potential_half_overflow = false; for (auto i = 0, end = context.InputCount(); i < end; ++i) { if (input_defs[i]->Exists()) { std::cout << "Input " << i << " Name: " << input_defs[i]->Name() << "\n"; @@ -491,11 +580,20 @@ void DumpNodeInputs( const bool is_shape_set = (dump_options.dump_flags & NodeDumpOptions::DumpFlags::Shape) != 0; PrintIf(is_shape_set, MakeString(" Shape: ", shape, "\n")); - if ((dump_options.dump_flags & NodeDumpOptions::DumpFlags::InputData) != 0) { + if ((dump_options.dump_flags & NodeDumpOptions::DumpFlags::InputData) != 0 || check_half_overflow) { tensor_metadata.name = input_defs[i]->Name(); tensor_metadata.step = dump_context.iteration; tensor_metadata.consumer = node.Name() + ":" + std::to_string(i); - DumpTensor(dump_options, *tensor, tensor_metadata, session_state); + + TensorStatisticsData tensor_statistics; + DumpTensor(dump_options, *tensor, tensor_metadata, tensor_statistics, session_state); + + if (check_half_overflow && tensor_statistics.is_float) { + float threshold = dump_options.half_overflow_threshold; + if (tensor_statistics.float_min < -threshold || tensor_statistics.float_max > threshold) { + potential_half_overflow = true; + } + } } } else { std::cout << " is empty optional tensor.\n"; @@ -511,14 +609,19 @@ void DumpNodeInputs( std::cout << "Input " << i << " is optional and was not provided.\n"; } } + + if (check_half_overflow) { + dump_analysis.Add(node.Name(), node.OpType(), potential_half_overflow); + } } void DumpNodeInputs( const NodeDumpContext& dump_context, const OpKernelContext& context, const Node& node, - const SessionState& session_state) { - DumpNodeInputs(NodeDumpOptionsFromEnvironmentVariables(), dump_context, context, node, session_state); + const SessionState& session_state, + NodeDumpAnalysis& dump_analysis) { + DumpNodeInputs(NodeDumpOptionsFromEnvironmentVariables(), dump_context, context, node, session_state, dump_analysis); } void DumpNodeOutputs( @@ -526,7 +629,8 @@ void DumpNodeOutputs( const NodeDumpContext& dump_context, OpKernelContext& context, const Node& node, - const SessionState& session_state) { + const SessionState& session_state, + NodeDumpAnalysis& dump_analysis) { const bool is_any_output_dumped = IsAnyOutputDumped(dump_options); if (!is_any_output_dumped) { return; @@ -549,6 +653,9 @@ void DumpNodeOutputs( const auto& output_defs = node.OutputDefs(); TensorMetadata tensor_metadata; + bool check_half_overflow = (dump_options.data_destination == NodeDumpOptions::DataDestination::StdOut) && + (dump_options.dump_flags & NodeDumpOptions::DumpFlags::HalfConversionOverflow) != 0; + bool potential_half_overflow = false; for (auto i = 0, end = context.OutputCount(); i < end; ++i) { if (output_defs[i]->Exists()) { std::cout << "Output " << i << " Name: " << output_defs[i]->Name() << "\n"; @@ -562,11 +669,20 @@ void DumpNodeOutputs( const bool is_shape_set = (dump_options.dump_flags & NodeDumpOptions::DumpFlags::Shape) != 0; PrintIf(is_shape_set, MakeString(" Shape: ", shape, "\n")); - if ((dump_options.dump_flags & NodeDumpOptions::DumpFlags::OutputData) != 0) { + if ((dump_options.dump_flags & NodeDumpOptions::DumpFlags::OutputData) != 0 || check_half_overflow) { tensor_metadata.name = output_defs[i]->Name(); tensor_metadata.step = dump_context.iteration; tensor_metadata.producer = node.Name() + ":" + std::to_string(i); - DumpTensor(dump_options, *tensor, tensor_metadata, session_state); + + TensorStatisticsData tensor_statistics; + DumpTensor(dump_options, *tensor, tensor_metadata, tensor_statistics, session_state); + + if (check_half_overflow && tensor_statistics.is_float) { + float threshold = dump_options.half_overflow_threshold; + if (tensor_statistics.float_min < -threshold || tensor_statistics.float_max > threshold) { + potential_half_overflow = true; + } + } } } else { std::cout << " is empty optional tensor.\n"; @@ -582,6 +698,10 @@ void DumpNodeOutputs( std::cout << "Output " << i << " is optional and was not produced.\n"; } + if (check_half_overflow) { + dump_analysis.Add(node.Name(), node.OpType(), potential_half_overflow); + } + std::cout << std::endl; } } @@ -590,8 +710,9 @@ void DumpNodeOutputs( const NodeDumpContext& dump_context, OpKernelContext& context, const Node& node, - const SessionState& session_state) { - DumpNodeOutputs(NodeDumpOptionsFromEnvironmentVariables(), dump_context, context, node, session_state); + const SessionState& session_state, + NodeDumpAnalysis& dump_analysis) { + DumpNodeOutputs(NodeDumpOptionsFromEnvironmentVariables(), dump_context, context, node, session_state, dump_analysis); } } // namespace utils diff --git a/onnxruntime/core/framework/debug_node_inputs_outputs_utils.h b/onnxruntime/core/framework/debug_node_inputs_outputs_utils.h index 6090a835aa060..2ea7d59ad620e 100644 --- a/onnxruntime/core/framework/debug_node_inputs_outputs_utils.h +++ b/onnxruntime/core/framework/debug_node_inputs_outputs_utils.h @@ -19,6 +19,10 @@ #include "core/framework/op_kernel.h" #include "core/framework/session_state.h" #include "core/graph/graph.h" +#include +#include +#include +#include namespace onnxruntime { namespace utils { @@ -37,6 +41,8 @@ constexpr const char* kDumpInputData = "ORT_DEBUG_NODE_IO_DUMP_INPUT_DATA"; constexpr const char* kDumpOutputData = "ORT_DEBUG_NODE_IO_DUMP_OUTPUT_DATA"; // Output statistics data like min, max, count of NaN, count of infinity etc. constexpr const char* kDumpStatisticsData = "ORT_DEBUG_NODE_IO_DUMP_STATISTICS_DATA"; +// Output node name when any float input or output exceeds a threshold for float16 conversion overflow. +constexpr const char* kDumpHalfConversionOverflow = "ORT_DEBUG_NODE_IO_DUMP_HALF_CONVERSION_OVERFLOW"; // specify a node name filter to limit the nodes that are dumped // see NodeDumpOptions::FilterOptions @@ -61,6 +67,10 @@ constexpr const char* kSnippetThreshold = "ORT_DEBUG_NODE_IO_SNIPPET_THRESHOLD"; // Number of array items in snippet at beginning and end of each dimension (default 3) constexpr const char* kSnippetEdgeItems = "ORT_DEBUG_NODE_IO_SNIPPET_EDGE_ITEMS"; +// Threshold for float to float16 conversion overflow detection (default 50000). +// It is a positive integer that <= 65504, and it is recommended to add some margin for new inputs. +constexpr const char* kHalfOverflowThreshold = "ORT_DEBUG_NODE_IO_HALF_OVERFLOW_THRESHOLD"; + } // namespace debug_node_inputs_outputs_env_vars constexpr char kFilterPatternDelimiter = ';'; @@ -73,7 +83,8 @@ struct NodeDumpOptions { OutputData = 1 << 2, NodePlacement = 1 << 3, StatisticsData = 1 << 4, - AllData = Shape | InputData | OutputData | NodePlacement | StatisticsData, + HalfConversionOverflow = 1 << 5, + AllData = Shape | InputData | OutputData | NodePlacement | StatisticsData | HalfConversionOverflow, }; // specifies the information to dump per node @@ -117,6 +128,9 @@ struct NodeDumpOptions { // Number of array items in snippet at beginning and end of each dimension for Stdout. int snippet_edge_items; + + // Threshold for float16 conversion overflow. + float half_overflow_threshold; }; struct NodeDumpContext { @@ -126,6 +140,16 @@ struct NodeDumpContext { size_t program_counter; }; +// A session level analysis of node dumps. It can be used to collect some statistics or analysis during node dumps. +struct NodeDumpAnalysis { + std::mutex set_mutex; + std::unordered_set half_overflow_nodes; + std::unordered_map half_overflow_ops; + int counter{0}; + void Add(const std::string& node_name, const std::string& op_name, bool is_half_overflow); + void PrintToStdOut(const std::string& model_path); +}; + // gets NodeDumpOptions instance configured from environment variable values const NodeDumpOptions& NodeDumpOptionsFromEnvironmentVariables(); @@ -135,13 +159,15 @@ void DumpNodeInputs( const NodeDumpContext& dump_context, const OpKernelContext& context, const Node& node, - const SessionState& session_state); + const SessionState& session_state, + NodeDumpAnalysis& dump_analysis); void DumpNodeInputs( const NodeDumpContext& dump_context, const OpKernelContext& context, const Node& node, - const SessionState& session_state); + const SessionState& session_state, + NodeDumpAnalysis& dump_analysis); // dumps outputs for a node void DumpNodeOutputs( @@ -149,13 +175,15 @@ void DumpNodeOutputs( const NodeDumpContext& dump_context, OpKernelContext& context, const Node& node, - const SessionState& session_state); + const SessionState& session_state, + NodeDumpAnalysis& dump_analysis); void DumpNodeOutputs( const NodeDumpContext& dump_context, OpKernelContext& context, const Node& node, - const SessionState& session_state); + const SessionState& session_state, + NodeDumpAnalysis& dump_analysis); } // namespace utils } // namespace onnxruntime diff --git a/onnxruntime/core/framework/print_tensor_statistics_utils.h b/onnxruntime/core/framework/print_tensor_statistics_utils.h index 65360674e88d0..c2030424ef19d 100644 --- a/onnxruntime/core/framework/print_tensor_statistics_utils.h +++ b/onnxruntime/core/framework/print_tensor_statistics_utils.h @@ -8,6 +8,14 @@ namespace onnxruntime { namespace utils { +// Currently we only store statistics data for float tensors that printed to stdout. +// It can be extended to other types if needed. +struct TensorStatisticsData { + bool is_float = false; + float float_min; + float float_max; +}; + template int my_fpclassify(const T& val) { return std::fpclassify(val); @@ -30,7 +38,7 @@ void PrintFloatStats(const T* data, size_t count) { size_t zero = 0; size_t subnormal = 0; for (size_t i = 0; i < count; i++) { - switch (my_fpclassify(*data)) { + switch (my_fpclassify(data[i])) { case FP_INFINITE: inf++; break; @@ -59,7 +67,7 @@ void PrintFloatStats(const T* data, size_t count) { } template -void PrintCommonStats(const T* data, size_t count) { +void PrintCommonStats(const T* data, size_t count, TensorStatisticsData& tensor_statistics) { T min = data[0]; T max = min; for (size_t i = 1; i < count; i++) { @@ -77,30 +85,38 @@ void PrintCommonStats(const T* data, size_t count) { std::cout << ",Max="; PrintValue(max); + + // Statistics for float and double only for now. + if constexpr (std::is_same::value) { + tensor_statistics.is_float = true; + tensor_statistics.float_min = static_cast(min); + tensor_statistics.float_max = static_cast(max); + } } -#define DEF_PRINT_COMMON_STATS_INT4(INT4_TYPE) \ - template <> \ - inline void PrintCommonStats(const INT4_TYPE* data, size_t count) { \ - using UnpackedType = typename INT4_TYPE::UnpackedType; \ - UnpackedType min = data[0].GetElem(0); \ - UnpackedType max = min; \ - for (size_t i = 1; i < count; i++) { \ - auto indices = INT4_TYPE::GetTensorElemIndices(i); \ - auto value = data[indices.first].GetElem(indices.second); \ - if (value > max) { \ - max = value; \ - } \ - if (value < min) { \ - min = value; \ - } \ - } \ - \ - std::cout << "Min="; \ - PrintValue(min); \ - \ - std::cout << ",Max="; \ - PrintValue(max); \ +#define DEF_PRINT_COMMON_STATS_INT4(INT4_TYPE) \ + template <> \ + inline void PrintCommonStats( \ + const INT4_TYPE* data, size_t count, TensorStatisticsData&) { \ + using UnpackedType = typename INT4_TYPE::UnpackedType; \ + UnpackedType min = data[0].GetElem(0); \ + UnpackedType max = min; \ + for (size_t i = 1; i < count; i++) { \ + auto indices = INT4_TYPE::GetTensorElemIndices(i); \ + auto value = data[indices.first].GetElem(indices.second); \ + if (value > max) { \ + max = value; \ + } \ + if (value < min) { \ + min = value; \ + } \ + } \ + \ + std::cout << "Min="; \ + PrintValue(min); \ + \ + std::cout << ",Max="; \ + PrintValue(max); \ } DEF_PRINT_COMMON_STATS_INT4(Int4x2) @@ -129,36 +145,36 @@ void PrintHalfStats(const T* data, size_t count) { } template -void PrintTensorStats(const T* tensor, size_t count) { - PrintCommonStats(tensor, count); +void PrintTensorStats(const T* tensor, size_t count, TensorStatisticsData& tensor_statistics) { + PrintCommonStats(tensor, count, tensor_statistics); } template <> -void PrintTensorStats(const float* tensor, size_t count) { - PrintCommonStats(tensor, count); +void PrintTensorStats(const float* tensor, size_t count, TensorStatisticsData& tensor_statistics) { + PrintCommonStats(tensor, count, tensor_statistics); PrintFloatStats(tensor, count); } template <> -void PrintTensorStats(const double* tensor, size_t count) { - PrintCommonStats(tensor, count); +void PrintTensorStats(const double* tensor, size_t count, TensorStatisticsData& tensor_statistics) { + PrintCommonStats(tensor, count, tensor_statistics); PrintFloatStats(tensor, count); } template <> -void PrintTensorStats(const MLFloat16* tensor, size_t count) { +void PrintTensorStats(const MLFloat16* tensor, size_t count, TensorStatisticsData&) { PrintHalfStats(tensor, count); PrintFloatStats(tensor, count); } template <> -void PrintTensorStats(const BFloat16* tensor, size_t count) { +void PrintTensorStats(const BFloat16* tensor, size_t count, TensorStatisticsData&) { PrintHalfStats(tensor, count); PrintFloatStats(tensor, count); } template -void PrintCpuTensorStats(const Tensor& tensor) { +void PrintCpuTensorStats(const Tensor& tensor, TensorStatisticsData& tensor_statistics) { const auto& shape = tensor.Shape(); auto num_items = shape.Size(); if (num_items == 0) { @@ -166,12 +182,12 @@ void PrintCpuTensorStats(const Tensor& tensor) { } const T* data = tensor.Data(); - PrintTensorStats(data, num_items); + PrintTensorStats(data, num_items, tensor_statistics); std::cout << std::endl; } template <> -void PrintCpuTensorStats(const Tensor&) { +void PrintCpuTensorStats(const Tensor&, TensorStatisticsData&) { } } // namespace utils diff --git a/onnxruntime/core/framework/sequential_executor.cc b/onnxruntime/core/framework/sequential_executor.cc index 2185b8332b9cf..61fd9b08655b7 100644 --- a/onnxruntime/core/framework/sequential_executor.cc +++ b/onnxruntime/core/framework/sequential_executor.cc @@ -239,6 +239,10 @@ class SessionScope { << i.second << " bytes for " << i.first << std::endl; } #endif + +#ifdef DEBUG_NODE_INPUTS_OUTPUTS + dump_analysis_.PrintToStdOut(session_state_.GetGraphViewer().ModelPath().string()); +#endif } #if !defined(ORT_MINIMAL_BUILD) && defined(ORT_MEMORY_PROFILE) @@ -269,6 +273,7 @@ class SessionScope { #ifdef DEBUG_NODE_INPUTS_OUTPUTS utils::NodeDumpContext dump_context_; + utils::NodeDumpAnalysis dump_analysis_; #endif }; @@ -329,7 +334,7 @@ class KernelScope { #endif #ifdef DEBUG_NODE_INPUTS_OUTPUTS - utils::DumpNodeInputs(dump_context_, kernel_context_, kernel_.Node(), session_state_); + utils::DumpNodeInputs(dump_context_, kernel_context_, kernel_.Node(), session_state_, session_scope_.dump_analysis_); #endif #ifdef ENABLE_NVTX_PROFILE @@ -392,7 +397,7 @@ class KernelScope { #endif #ifdef DEBUG_NODE_INPUTS_OUTPUTS - utils::DumpNodeOutputs(dump_context_, kernel_context_, kernel_.Node(), session_state_); + utils::DumpNodeOutputs(dump_context_, kernel_context_, kernel_.Node(), session_state_, session_scope_.dump_analysis_); #endif } //~KernelScope diff --git a/onnxruntime/core/framework/session_state.cc b/onnxruntime/core/framework/session_state.cc index d7059bf848e83..27aa4f38a4ec9 100644 --- a/onnxruntime/core/framework/session_state.cc +++ b/onnxruntime/core/framework/session_state.cc @@ -101,7 +101,7 @@ SessionState::SessionState(Graph& graph, for (auto& ep : execution_providers_) { auto allocators = ep->CreatePreferredAllocators(); for (auto& alloc : allocators) { - allocators_->insert({alloc->Info().device, alloc}); // DONT overwrite existing key + allocators_->insert({alloc->Info().device, alloc}); // DON'T overwrite existing key } } } diff --git a/onnxruntime/core/graph/function_template.h b/onnxruntime/core/graph/function_template.h index 978174d943f14..0d3fee18d5d59 100644 --- a/onnxruntime/core/graph/function_template.h +++ b/onnxruntime/core/graph/function_template.h @@ -2,7 +2,7 @@ // Copyright (c) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. -#include "onnx/onnx_pb.h" +#include "core/graph/onnx_protobuf.h" namespace onnxruntime { diff --git a/onnxruntime/core/graph/graph.cc b/onnxruntime/core/graph/graph.cc index 9fee3e49dc0d2..7ee794ccbd2e8 100644 --- a/onnxruntime/core/graph/graph.cc +++ b/onnxruntime/core/graph/graph.cc @@ -50,7 +50,9 @@ namespace onnxruntime { #define NO_CHANGE_ON_SYNC_FLAG(...) \ do { \ const bool sync_needed = GraphProtoSyncNeeded(); \ - { __VA_ARGS__; } \ + { \ + __VA_ARGS__; \ + } \ GraphProtoSyncNeeded(sync_needed); \ } while (0) diff --git a/onnxruntime/core/mlas/inc/mlas.h b/onnxruntime/core/mlas/inc/mlas.h index 207c058d899b4..7e0335cc66ef0 100644 --- a/onnxruntime/core/mlas/inc/mlas.h +++ b/onnxruntime/core/mlas/inc/mlas.h @@ -1458,7 +1458,107 @@ MlasRotaryEmbedOneRow( T* output ); - /** +/** + * @brief Supply matrices data information to half precision gemm functions + */ +struct MLAS_HGEMM_DATA_PARAMS { + const MLAS_FP16* A; /**< Supplies the address of matrix A */ + size_t lda; /**< Supplies the first dimension of matrix A. */ + const MLAS_FP16* B; /**< Supplies the address of matrix B */ + size_t ldb; /**< Supplies the first dimension of matrix B. */ + MLAS_FP16* C; /**< Supplies the address of matrix C */ + size_t ldc; /**< Supplies the first dimension of matrix C. */ + uint16_t alpha; /**< Supplies the scalar alpha multiplier (see GEMM definition). FP16 encoding. */ + uint16_t beta; /**< Supplies the scalar beta multiplier (see GEMM definition). FP16 encoding. */ +}; + +/** + * @brief Check whether current CPU supports half precision gemm. + */ +bool +MLASCALL +MlasHGemmSupported( + CBLAS_TRANSPOSE TransA, + CBLAS_TRANSPOSE TransB + ); + +/** + * @brief Batched half precision matrix/matrix multiply operation (HGEMM) + * + * @param TransA Supplies the transpose operation for matrix A. + * @param TransB Supplies the transpose operation for matrix B. + * @param M Supplies the number of rows of matrix A and matrix C. + * @param N Supplies the number of columns of matrix B and matrix C. + * @param K Supplies the number of columns of matrix A and the number of rows of matrix B. + * @param Data A array of matrices data parameters + * @param BatchSize Supplies number of multiplications in this batch + * @param ThreadPool Supplies the thread pool object to use, else nullptr if the + base library threading support should be used. + */ +void +MLASCALL +MlasGemmBatch( + CBLAS_TRANSPOSE TransA, + CBLAS_TRANSPOSE TransB, + size_t M, + size_t N, + size_t K, + const MLAS_HGEMM_DATA_PARAMS* Data, + size_t BatchSize, + MLAS_THREADPOOL* ThreadPool + ); + +/** + * @brief half precision matrix/matrix multiply operation (HGEMM) + * C = alpha * op(A) * op(B) + beta * C + * + * @param TransA Supplies the transpose operation for matrix A. Currently only support CblasNoTrans. + * @param TransB Supplies the transpose operation for matrix B. Currently only support CblasTrans. + * @param M Supplies the number of rows of matrix A and matrix C. + * @param N Supplies the number of columns of matrix B and matrix C. + * @param K Supplies the number of columns of matrix A and the number of rows of matrix B. + * @param A Supplies the address of matrix A + * @param lda Supplies the first dimension of matrix A. + * @param B Supplies the address of matrix B + * @param ldb Supplies the first dimension of matrix B. + * @param C Supplies the address of matrix C + * @param ldc Supplies the first dimension of matrix C. + * @param alpha Supplies the scalar alpha multiplier (see GEMM definition) + * @param beta Supplies the scalar beta multiplier (see GEMM definition) + * @param ThreadPool Supplies the thread pool object to use, else nullptr if the base library threading support + * should be used. + */ +inline +void +MlasGemm( + CBLAS_TRANSPOSE TransA, + CBLAS_TRANSPOSE TransB, + size_t M, + size_t N, + size_t K, + const MLAS_FP16* A, + size_t lda, + const MLAS_FP16* B, + size_t ldb, + MLAS_FP16* C, + size_t ldc, + uint16_t alpha, + uint16_t beta, + MLAS_THREADPOOL* ThreadPool +) { + MLAS_HGEMM_DATA_PARAMS Data; + Data.A = A; + Data.lda = lda; + Data.B = B; + Data.ldb = ldb; + Data.C = C; + Data.ldc = ldc; + Data.alpha = alpha; + Data.beta = beta; + MlasGemmBatch(TransA, TransB, M, N, K, &Data, 1, ThreadPool); +} + +/** * @brief Whether current CPU supports FP16 acceleration. */ bool MLASCALL diff --git a/onnxruntime/core/mlas/lib/fp16_common.h b/onnxruntime/core/mlas/lib/fp16_common.h index f4c49905ebbd7..acee567162b9d 100644 --- a/onnxruntime/core/mlas/lib/fp16_common.h +++ b/onnxruntime/core/mlas/lib/fp16_common.h @@ -349,4 +349,103 @@ MlasBitwiseSelectFloat16x4(MLAS_UINT16X4 select, MLAS_FLOAT16X4 ones, MLAS_FLOAT return vbsl_f16(select, ones, zeros); } +MLAS_FORCEINLINE +void +Transpose8x8(MLAS_FLOAT16X8& v0, MLAS_FLOAT16X8& v1, MLAS_FLOAT16X8& v2, MLAS_FLOAT16X8& v3, + MLAS_FLOAT16X8& v4, MLAS_FLOAT16X8& v5, MLAS_FLOAT16X8& v6, MLAS_FLOAT16X8& v7) +{ + // |v00|v01|v02|v03|v04|v05|v06|v07| + // |v10|v11|v12|v13|v14|v15|v16|v17| + // |v20|v21|v22|v23|v24|v25|v26|v27| + // |v30|v31|v32|v33|v34|v35|v36|v37| + // |v40|v41|v42|v43|v44|v45|v46|v47| + // |v50|v51|v52|v53|v54|v55|v56|v57| + // |v60|v61|v62|v63|v64|v65|v66|v67| + // |v70|v71|v72|v73|v74|v75|v76|v77| + float16x8x2_t t01 = vtrnq_f16(v0, v1); + float16x8x2_t t23 = vtrnq_f16(v2, v3); + float16x8x2_t t45 = vtrnq_f16(v4, v5); + float16x8x2_t t67 = vtrnq_f16(v6, v7); + // |v00|v10|v02|v12|v04|v14|v06|v16| + // |v01|v11|v03|v13|v05|v15|v07|v17| + // |v20|v30|v22|v32|v24|v34|v26|v36| + // |v21|v31|v23|v33|v25|v35|v27|v37| + // |v40|v50|v42|v52|v44|v54|v46|v56| + // |v41|v51|v43|v53|v45|v55|v47|v57| + // |v60|v70|v62|v72|v64|v74|v66|v76| + // |v61|v71|v63|v73|v65|v75|v67|v77| + float32x4x2_t t02 = vtrnq_f32(vreinterpretq_f32_f16(t01.val[0]), vreinterpretq_f32_f16(t23.val[0])); + float32x4x2_t t13 = vtrnq_f32(vreinterpretq_f32_f16(t01.val[1]), vreinterpretq_f32_f16(t23.val[1])); + float32x4x2_t t46 = vtrnq_f32(vreinterpretq_f32_f16(t45.val[0]), vreinterpretq_f32_f16(t67.val[0])); + float32x4x2_t t57 = vtrnq_f32(vreinterpretq_f32_f16(t45.val[1]), vreinterpretq_f32_f16(t67.val[1])); + // |v00|v10|v20|v30|v04|v14|v24|v34| + // |v01|v11|v21|v31|v05|v15|v25|v35| + // |v02|v12|v22|v32|v06|v16|v26|v36| + // |v03|v13|v23|v33|v07|v17|v27|v37| + // |v40|v50|v60|v70|v44|v54|v64|v74| + // |v41|v51|v61|v71|v45|v55|v65|v75| + // |v42|v52|v62|v72|v46|v56|v66|v76| + // |v43|v53|v63|v73|v47|v57|v67|v77| + v0 = vreinterpretq_f16_f64(vtrn1q_f64(vreinterpretq_f64_f32(t02.val[0]), vreinterpretq_f64_f32(t46.val[0]))); + v4 = vreinterpretq_f16_f64(vtrn2q_f64(vreinterpretq_f64_f32(t02.val[0]), vreinterpretq_f64_f32(t46.val[0]))); + v2 = vreinterpretq_f16_f64(vtrn1q_f64(vreinterpretq_f64_f32(t02.val[1]), vreinterpretq_f64_f32(t46.val[1]))); + v6 = vreinterpretq_f16_f64(vtrn2q_f64(vreinterpretq_f64_f32(t02.val[1]), vreinterpretq_f64_f32(t46.val[1]))); + v1 = vreinterpretq_f16_f64(vtrn1q_f64(vreinterpretq_f64_f32(t13.val[0]), vreinterpretq_f64_f32(t57.val[0]))); + v5 = vreinterpretq_f16_f64(vtrn2q_f64(vreinterpretq_f64_f32(t13.val[0]), vreinterpretq_f64_f32(t57.val[0]))); + v3 = vreinterpretq_f16_f64(vtrn1q_f64(vreinterpretq_f64_f32(t13.val[1]), vreinterpretq_f64_f32(t57.val[1]))); + v7 = vreinterpretq_f16_f64(vtrn2q_f64(vreinterpretq_f64_f32(t13.val[1]), vreinterpretq_f64_f32(t57.val[1]))); + // |v00|v10|v20|v30|v40|v50|v60|v70| + // |v01|v11|v21|v31|v41|v51|v61|v71| + // |v02|v12|v22|v32|v42|v52|v62|v72| + // |v03|v13|v23|v33|v43|v53|v63|v73| + // |v04|v14|v24|v34|v44|v54|v64|v74| + // |v05|v15|v25|v35|v45|v55|v65|v75| + // |v06|v16|v26|v36|v46|v56|v66|v76| + // |v07|v17|v27|v37|v47|v57|v67|v77| +} + +MLAS_FORCEINLINE +void +Transpose4x8(MLAS_FLOAT16X8& v0, MLAS_FLOAT16X8& v1, MLAS_FLOAT16X8& v2, MLAS_FLOAT16X8& v3) +{ + // |v00|v01|v02|v03|v04|v05|v06|v07| + // |v10|v11|v12|v13|v14|v15|v16|v17| + // |v20|v21|v22|v23|v24|v25|v26|v27| + // |v30|v31|v32|v33|v34|v35|v36|v37| + // => + // |v00|v10|v20|v30|v04|v14|v24|v34| + // |v01|v11|v21|v31|v05|v15|v25|v35| + // |v02|v12|v22|v32|v06|v16|v26|v36| + // |v03|v13|v23|v33|v07|v17|v27|v37| + float16x8x2_t t01 = vtrnq_f16(v0, v1); + float16x8x2_t t23 = vtrnq_f16(v2, v3); + + v0 = vreinterpretq_f16_f32(vtrn1q_f32(vreinterpretq_f32_f16(t01.val[0]), vreinterpretq_f32_f16(t23.val[0]))); + v2 = vreinterpretq_f16_f32(vtrn2q_f32(vreinterpretq_f32_f16(t01.val[0]), vreinterpretq_f32_f16(t23.val[0]))); + v1 = vreinterpretq_f16_f32(vtrn1q_f32(vreinterpretq_f32_f16(t01.val[1]), vreinterpretq_f32_f16(t23.val[1]))); + v3 = vreinterpretq_f16_f32(vtrn2q_f32(vreinterpretq_f32_f16(t01.val[1]), vreinterpretq_f32_f16(t23.val[1]))); +} + +MLAS_FORCEINLINE +void +Transpose4x4(MLAS_FLOAT16X4& v0, MLAS_FLOAT16X4& v1, MLAS_FLOAT16X4& v2, MLAS_FLOAT16X4& v3) +{ + // |v00|v01|v02|v03| + // |v10|v11|v12|v13| + // |v20|v21|v22|v23| + // |v30|v31|v32|v33| + // => + // |v00|v10|v20|v30| + // |v01|v11|v21|v31| + // |v02|v12|v22|v32| + // |v03|v13|v23|v33| + float16x4x2_t t01 = vtrn_f16(v0, v1); + float16x4x2_t t23 = vtrn_f16(v2, v3); + + v0 = vreinterpret_f16_f32(vtrn1_f32(vreinterpret_f32_f16(t01.val[0]), vreinterpret_f32_f16(t23.val[0]))); + v1 = vreinterpret_f16_f32(vtrn1_f32(vreinterpret_f32_f16(t01.val[1]), vreinterpret_f32_f16(t23.val[1]))); + v2 = vreinterpret_f16_f32(vtrn2_f32(vreinterpret_f32_f16(t01.val[0]), vreinterpret_f32_f16(t23.val[0]))); + v3 = vreinterpret_f16_f32(vtrn2_f32(vreinterpret_f32_f16(t01.val[1]), vreinterpret_f32_f16(t23.val[1]))); +} + #endif // fp16 vector intrinsic supported diff --git a/onnxruntime/core/mlas/lib/halfgemm.cpp b/onnxruntime/core/mlas/lib/halfgemm.cpp index 49387d2fc998f..65ab0e9ce4630 100644 --- a/onnxruntime/core/mlas/lib/halfgemm.cpp +++ b/onnxruntime/core/mlas/lib/halfgemm.cpp @@ -324,6 +324,176 @@ MlasHalfGemmKernel( } } +bool +MLASCALL +MlasHGemmSupported( + CBLAS_TRANSPOSE TransA, + CBLAS_TRANSPOSE TransB +) { + auto* dispatch = GetMlasPlatform().HGemmDispatch; + if (TransA == CblasNoTrans && TransB == CblasTrans) { + return dispatch && + dispatch->HGemmKernel_TransposedB && + dispatch->HPackBKernel_TransposedB && + dispatch->HGemmKernel_TransposedPackedB; + } + + return false; +} + +void +HGemmOperation( + CBLAS_TRANSPOSE TransA, + CBLAS_TRANSPOSE TransB, + size_t K, // full K slice + const MLAS_HGEMM_DATA_PARAMS* DataParams, + const size_t RangeStartM, + const size_t RangeCountM, + const size_t RangeStartN, + const size_t RangeCountN +) { + const size_t lda = DataParams->lda; + const size_t ldb = DataParams->ldb; + const size_t ldc = DataParams->ldc; + const _mlas_fp16_ alpha = DataParams->alpha; + const _mlas_fp16_ beta = DataParams->beta; + auto* dispatch = GetMlasPlatform().HGemmDispatch; + constexpr size_t StrideM = 2; + const auto beta_add = MLAS_FP16(1.0f); + constexpr size_t buffer_size = MLAS_HGEMM_STRIDEN * MLAS_HGEMM_STRIDEK; + MLAS_DECLSPEC_ALIGN(MLAS_FP16 PackedB[buffer_size], 16 * sizeof(_mlas_fp16_)); + + if (TransA == CblasNoTrans && TransB == CblasTrans) { + const auto* A = DataParams->A + RangeStartM * lda; + const auto* B = DataParams->B + RangeStartN * ldb; + auto* C = DataParams->C + RangeStartM * ldc + RangeStartN; + + if (RangeCountM <= StrideM) { + if (!dispatch || !dispatch->HGemmKernel_TransposedB) { + MLAS_THROW_EX(std::runtime_error, "hgemm does not have A x Transposed(B) kernels"); + } + // When M is small, B is visited once. The overhead of Pack(B') exceeds the benefits + // from A x Pack(B'). Therefore directly calculate A x B'. + // Without PackB, to utilize memory locality, iterate full K. + constexpr size_t StrideN = 16; + for (size_t n = 0, countN; n < RangeCountN; n += countN) { + countN = std::min(StrideN, RangeCountN - n); + dispatch->HGemmKernel_TransposedB(A, B, C, RangeCountM, countN, K, lda, ldb, ldc, alpha, beta); + B += countN * ldb; + C += countN; + } + } else { + if (!dispatch || !dispatch->HPackBKernel_TransposedB || !dispatch->HGemmKernel_TransposedPackedB) { + MLAS_THROW_EX(std::runtime_error, "hgemm does not have A x Transposed(B) kernels"); + } + // 16N is the smallest pack unit. + const size_t StrideK = std::min(K, size_t(MLAS_HGEMM_STRIDEK)); + const size_t StrideN = buffer_size/StrideK & (~15); // >= MLAS_HGEMM_STRIDEN + for (size_t n = 0, countN; n < RangeCountN; n += countN) { + countN = std::min(StrideN, RangeCountN - n); + const MLAS_FP16* a = A; + const MLAS_FP16* b = B; + MLAS_FP16* c = C; + for (size_t k = 0, countK; k < K; k += countK) { + countK = std::min(StrideK, K - k); + dispatch->HPackBKernel_TransposedB(b, PackedB, countN, countK, ldb); + const MLAS_FP16* aa = a; + MLAS_FP16* cc = c; + for (size_t m = 0, countM; m < RangeCountM; m += countM) { + countM = std::min(StrideM, RangeCountM - m); + // First K iteration, beta is applied to the whole C. In rest K iterations, use add mode. + dispatch->HGemmKernel_TransposedPackedB( + aa, PackedB, cc, countM, countN, countK, lda, ldc, alpha, k == 0 ? beta : beta_add.val); + aa += countM * lda; + cc += countM * ldc; + } + a += countK; + b += countK; + } + B += countN * ldb; + C += countN; + } + } + } else { + MLAS_THROW_EX(std::runtime_error, "hgemm currently only support A x Transpoe(B)"); + } +} + +void +MLASCALL +MlasGemmBatch( + CBLAS_TRANSPOSE TransA, + CBLAS_TRANSPOSE TransB, + size_t M, + size_t N, + size_t K, + const MLAS_HGEMM_DATA_PARAMS* Data, + size_t BatchSize, + MLAS_THREADPOOL* ThreadPool +) { + if (!ThreadPool) { + for (size_t gemm_i = 0; gemm_i < BatchSize; gemm_i++) { + HGemmOperation(TransA, TransB, K, &Data[gemm_i], 0, M, 0, N); + } + return; + } + + const double Complexity = double(M) * double(N) * double(K) * double(BatchSize); + ptrdiff_t TargetThreadCount; + + if (Complexity < double(MLAS_HGEMM_THREAD_COMPLEXITY) * GetMlasPlatform().MaximumThreadCount) { + TargetThreadCount = ptrdiff_t(Complexity / double(MLAS_HGEMM_THREAD_COMPLEXITY)) + 1; + } else { + TargetThreadCount = GetMlasPlatform().MaximumThreadCount; + } + + ptrdiff_t MaximumThreadCount = MlasGetMaximumThreadCount(ThreadPool); + if (TargetThreadCount >= MaximumThreadCount) { + TargetThreadCount = MaximumThreadCount; + } + + // Segment the operation across multiple threads. + + ptrdiff_t ThreadsPerGemm = TargetThreadCount / BatchSize; + if (ThreadsPerGemm < 1) { + ThreadsPerGemm = 1; + } + + constexpr size_t StrideM = 128; + + size_t nc = N; + if (ThreadsPerGemm > 1) { + // more than one thread per GEMM + + const size_t BlockedM = MlasDivRoundup(M, StrideM); + const size_t max_nc = MlasDivRoundup(N * BlockedM, ThreadsPerGemm); + if (max_nc < nc) { + nc = std::min( + nc, MlasDivRoundup(max_nc, MLAS_HGEMM_STRIDEN_THREAD_ALIGN) * MLAS_HGEMM_STRIDEN_THREAD_ALIGN); + } + } + const size_t StrideN = nc; + + const size_t ThreadCountM = MlasDivRoundup(M, StrideM); + const size_t ThreadCountN = MlasDivRoundup(N, StrideN); + ThreadsPerGemm = ThreadCountM * ThreadCountN; + + MlasTrySimpleParallel(ThreadPool, ThreadsPerGemm * static_cast(BatchSize), [&](ptrdiff_t tid) { + const auto gemm_i = tid / ThreadsPerGemm; + const auto blk_i = tid % ThreadsPerGemm; + + const ptrdiff_t ThreadIdN = blk_i / ThreadCountM; + const ptrdiff_t ThreadIdM = blk_i % ThreadCountM; + + const size_t RangeStartM = ThreadIdM * StrideM; + const size_t RangeCountM = std::min(M - RangeStartM, (size_t)StrideM); + + const size_t RangeStartN = ThreadIdN * StrideN; + const size_t RangeCountN = std::min(N - RangeStartN, (size_t)StrideN); + + HGemmOperation(TransA, TransB, K, &Data[gemm_i], RangeStartM, RangeCountM, RangeStartN, RangeCountN); + }); +} const MLAS_HALFGEMM_DISPATCH MlasHalfGemmDispatchDefault = { MlasHalfGemmOperation, diff --git a/onnxruntime/core/mlas/lib/halfgemm.h b/onnxruntime/core/mlas/lib/halfgemm.h index 61e2fbb0afc6a..e280e6d40973f 100644 --- a/onnxruntime/core/mlas/lib/halfgemm.h +++ b/onnxruntime/core/mlas/lib/halfgemm.h @@ -513,3 +513,125 @@ MlasHalfGemmGetDispatch() return &MlasHalfGemmDispatchDefault; #endif } + +namespace hgemm_neon { + +void HPackB_TransposedB_Kernel( + const MLAS_FP16* B, + MLAS_FP16* PackedB, + size_t CountN, + size_t CountK, + size_t ldb +); + +void HGemm_TransposedB_Kernel( + const MLAS_FP16* A, + const MLAS_FP16* B, + MLAS_FP16* C, + size_t CountM, + size_t CountN, + size_t CountK, + size_t lda, + size_t ldb, + size_t ldc, + _mlas_fp16_ alpha, + _mlas_fp16_ beta +); + +void HGemm_TransposedPackedB_Kernel( + const MLAS_FP16* A, + const MLAS_FP16* PackedB, + MLAS_FP16* C, + size_t CountM, + size_t CountN, + size_t CountK, + size_t lda, + size_t ldc, + _mlas_fp16_ alpha, + _mlas_fp16_ beta +); + +} // namespace hgemm_neon + +struct MLAS_HGEMM_DISPATCH { + /** + * @brief Pack the B matrix segment. B is column-major. Elements from CountK rows x N columns are packed + * continuously in row-major. + * First pack CountK rows x 16 columns, then pack CountK rows x 8 columns. + * If there are < 8 columns left, pad the columns with 0. + * @param B the first element of the B matrix segment. Column major. + * @param[out] PackedB the first element of the packed B matrix segment. + * @param CountN the number of columns of B chunk. + * @param CountK the number of rows of B chunk. + */ + typedef void(HPackBKernel_TransposedB_Fn) ( + const MLAS_FP16* B, + MLAS_FP16* PackedB, + size_t CountN, + size_t CountK, + size_t ldb + ); + + HPackBKernel_TransposedB_Fn* HPackBKernel_TransposedB = nullptr; + + /** + * @brief C = alpha * A * Transpose(B) + beta * C. CountM <= 2. B is not packed. Used when M is small. + * + * @param A first row of the A matrix segment. Row major. + * @param B first column of the B matrix segment. Column major. + * @param[out] C first element of the output matrix segment. Row major. + * @param CountM the number of rows of A chunk. + * @param CountN the number of columns of B chunk. + * @param CountK the number of columns of A chunk and the number of rows of B chunk. + * @param lda the leading dimension of A. + * @param ldb the leading dimension of B. + * @param ldc the leading dimension of C. + * @param alpha the alpha scalar value. + * @param beta the beta scalar value. + */ + typedef void(HGemmKernel_TransposedB_Fn)( + const MLAS_FP16* A, + const MLAS_FP16* B, + MLAS_FP16* C, + size_t CountM, + size_t CountN, + size_t CountK, + size_t lda, + size_t ldb, + size_t ldc, + _mlas_fp16_ alpha, + _mlas_fp16_ beta + ); + + HGemmKernel_TransposedB_Fn* HGemmKernel_TransposedB = nullptr; + + /** + * @brief C = alpha * A * Transpose(B) + beta * C. CountM <= 2. B has been packed using HPackBKernel_TransposedB_Fn. + * Use when M is large. + * + * @param A first row of the A matrix segment. Row major. + * @param PackedB first element of the packed B buffer. + * @param[out] C first element of the output matrix segment. Row major. + * @param CountM the number of rows of A chunk. + * @param CountN the number of columns of B chunk. + * @param CountK the number of columns of A chunk and the number of rows of B chunk. + * @param lda the leading dimension of A. + * @param ldc the leading dimension of C. + * @param alpha the alpha scalar value. + * @param beta the beta scalar value. + */ + typedef void(HGemmKernel_TransposedPackedB_Fn)( + const MLAS_FP16* A, + const MLAS_FP16* PackedB, + MLAS_FP16* C, + size_t CountM, + size_t CountN, + size_t CountK, + size_t lda, + size_t ldc, + _mlas_fp16_ alpha, + _mlas_fp16_ beta + ); + + HGemmKernel_TransposedPackedB_Fn* HGemmKernel_TransposedPackedB = nullptr; +}; diff --git a/onnxruntime/core/mlas/lib/halfgemm_kernel_neon_fp16.cpp b/onnxruntime/core/mlas/lib/halfgemm_kernel_neon_fp16.cpp new file mode 100644 index 0000000000000..02ce38fcb21d6 --- /dev/null +++ b/onnxruntime/core/mlas/lib/halfgemm_kernel_neon_fp16.cpp @@ -0,0 +1,1572 @@ +/*++ + +Copyright (c) Microsoft Corporation. All rights reserved. + +Licensed under the MIT License. + +Module Name: + + halfgemm_kernel_neon_fp16.cpp + +Abstract: + + This module implements half precision GEMM kernel for neon. + +--*/ + +#include + +#include "halfgemm.h" +#include "fp16_common.h" + +namespace hgemm_neon { + +void HPackB_TransposedB_Kernel( + const MLAS_FP16* B, + MLAS_FP16* PackedB, + size_t CountN, + size_t CountK, + size_t ldb +) { + const _mlas_fp16_* B_data = reinterpret_cast(B); + _mlas_fp16_* PackedB_data = reinterpret_cast<_mlas_fp16_*>(PackedB); + + for (; CountN >= 16; CountN -= 16, B_data += 16 * ldb) { + const _mlas_fp16_* b = B_data; + size_t k = CountK; + constexpr size_t step = 8 * 16; // pack 8 * 16 + for (; k >= 8; k -= 8, b += 8, PackedB_data += step) { + float16x8_t v0 = MlasLoadFloat16x8(b); + float16x8_t v1 = MlasLoadFloat16x8(b + ldb); + float16x8_t v2 = MlasLoadFloat16x8(b + 2 * ldb); + float16x8_t v3 = MlasLoadFloat16x8(b + 3 * ldb); + float16x8_t v4 = MlasLoadFloat16x8(b + 4 * ldb); + float16x8_t v5 = MlasLoadFloat16x8(b + 5 * ldb); + float16x8_t v6 = MlasLoadFloat16x8(b + 6 * ldb); + float16x8_t v7 = MlasLoadFloat16x8(b + 7 * ldb); + float16x8_t v8 = MlasLoadFloat16x8(b + 8 * ldb); + float16x8_t v9 = MlasLoadFloat16x8(b + 9 * ldb); + float16x8_t vA = MlasLoadFloat16x8(b + 10 * ldb); + float16x8_t vB = MlasLoadFloat16x8(b + 11 * ldb); + float16x8_t vC = MlasLoadFloat16x8(b + 12 * ldb); + float16x8_t vD = MlasLoadFloat16x8(b + 13 * ldb); + float16x8_t vE = MlasLoadFloat16x8(b + 14 * ldb); + float16x8_t vF = MlasLoadFloat16x8(b + 15 * ldb); + Transpose8x8(v0, v1, v2, v3, v4, v5, v6, v7); + Transpose8x8(v8, v9, vA, vB, vC, vD, vE, vF); + + MlasStoreFloat16x8(PackedB_data, v0); + MlasStoreFloat16x8(PackedB_data + 8, v8); + MlasStoreFloat16x8(PackedB_data + 16, v1); + MlasStoreFloat16x8(PackedB_data + 24, v9); + MlasStoreFloat16x8(PackedB_data + 32, v2); + MlasStoreFloat16x8(PackedB_data + 40, vA); + MlasStoreFloat16x8(PackedB_data + 48, v3); + MlasStoreFloat16x8(PackedB_data + 56, vB); + MlasStoreFloat16x8(PackedB_data + 64, v4); + MlasStoreFloat16x8(PackedB_data + 72, vC); + MlasStoreFloat16x8(PackedB_data + 80, v5); + MlasStoreFloat16x8(PackedB_data + 88, vD); + MlasStoreFloat16x8(PackedB_data + 96, v6); + MlasStoreFloat16x8(PackedB_data + 104, vE); + MlasStoreFloat16x8(PackedB_data + 112, v7); + MlasStoreFloat16x8(PackedB_data + 120, vF); + } + + if (k & 4) { + float16x4_t v0 = MlasLoadFloat16x4(b); + float16x4_t v1 = MlasLoadFloat16x4(b + ldb); + float16x4_t v2 = MlasLoadFloat16x4(b + 2 * ldb); + float16x4_t v3 = MlasLoadFloat16x4(b + 3 * ldb); + float16x4_t v4 = MlasLoadFloat16x4(b + 4 * ldb); + float16x4_t v5 = MlasLoadFloat16x4(b + 5 * ldb); + float16x4_t v6 = MlasLoadFloat16x4(b + 6 * ldb); + float16x4_t v7 = MlasLoadFloat16x4(b + 7 * ldb); + float16x4_t v8 = MlasLoadFloat16x4(b + 8 * ldb); + float16x4_t v9 = MlasLoadFloat16x4(b + 9 * ldb); + float16x4_t vA = MlasLoadFloat16x4(b + 10 * ldb); + float16x4_t vB = MlasLoadFloat16x4(b + 11 * ldb); + float16x4_t vC = MlasLoadFloat16x4(b + 12 * ldb); + float16x4_t vD = MlasLoadFloat16x4(b + 13 * ldb); + float16x4_t vE = MlasLoadFloat16x4(b + 14 * ldb); + float16x4_t vF = MlasLoadFloat16x4(b + 15 * ldb); + Transpose4x4(v0, v1, v2, v3); + Transpose4x4(v4, v5, v6, v7); + Transpose4x4(v8, v9, vA, vB); + Transpose4x4(vC, vD, vE, vF); + MlasStoreFloat16x4(PackedB_data, v0); + MlasStoreFloat16x4(PackedB_data + 4, v4); + MlasStoreFloat16x4(PackedB_data + 8, v8); + MlasStoreFloat16x4(PackedB_data + 12, vC); + MlasStoreFloat16x4(PackedB_data + 16, v1); + MlasStoreFloat16x4(PackedB_data + 20, v5); + MlasStoreFloat16x4(PackedB_data + 24, v9); + MlasStoreFloat16x4(PackedB_data + 28, vD); + MlasStoreFloat16x4(PackedB_data + 32, v2); + MlasStoreFloat16x4(PackedB_data + 36, v6); + MlasStoreFloat16x4(PackedB_data + 40, vA); + MlasStoreFloat16x4(PackedB_data + 44, vE); + MlasStoreFloat16x4(PackedB_data + 48, v3); + MlasStoreFloat16x4(PackedB_data + 52, v7); + MlasStoreFloat16x4(PackedB_data + 56, vB); + MlasStoreFloat16x4(PackedB_data + 60, vF); + + k -= 4, b += 4, PackedB_data += 4 * 16; + } + + if (k > 0) { + float16x4_t v0 = MlasLoadPartialFloat16x4(b, k); + float16x4_t v1 = MlasLoadPartialFloat16x4(b + ldb, k); + float16x4_t v2 = MlasLoadPartialFloat16x4(b + 2 * ldb, k); + float16x4_t v3 = MlasLoadPartialFloat16x4(b + 3 * ldb, k); + float16x4_t v4 = MlasLoadPartialFloat16x4(b + 4 * ldb, k); + float16x4_t v5 = MlasLoadPartialFloat16x4(b + 5 * ldb, k); + float16x4_t v6 = MlasLoadPartialFloat16x4(b + 6 * ldb, k); + float16x4_t v7 = MlasLoadPartialFloat16x4(b + 7 * ldb, k); + float16x4_t v8 = MlasLoadPartialFloat16x4(b + 8 * ldb, k); + float16x4_t v9 = MlasLoadPartialFloat16x4(b + 9 * ldb, k); + float16x4_t vA = MlasLoadPartialFloat16x4(b + 10 * ldb, k); + float16x4_t vB = MlasLoadPartialFloat16x4(b + 11 * ldb, k); + float16x4_t vC = MlasLoadPartialFloat16x4(b + 12 * ldb, k); + float16x4_t vD = MlasLoadPartialFloat16x4(b + 13 * ldb, k); + float16x4_t vE = MlasLoadPartialFloat16x4(b + 14 * ldb, k); + float16x4_t vF = MlasLoadPartialFloat16x4(b + 15 * ldb, k); + Transpose4x4(v0, v1, v2, v3); + Transpose4x4(v4, v5, v6, v7); + Transpose4x4(v8, v9, vA, vB); + Transpose4x4(vC, vD, vE, vF); + MlasStoreFloat16x4(PackedB_data, v0); + MlasStoreFloat16x4(PackedB_data + 4, v4); + MlasStoreFloat16x4(PackedB_data + 8, v8); + MlasStoreFloat16x4(PackedB_data + 12, vC); + if (k > 1) { + MlasStoreFloat16x4(PackedB_data + 16, v1); + MlasStoreFloat16x4(PackedB_data + 20, v5); + MlasStoreFloat16x4(PackedB_data + 24, v9); + MlasStoreFloat16x4(PackedB_data + 28, vD); + } + if (k > 2) { + MlasStoreFloat16x4(PackedB_data + 32, v2); + MlasStoreFloat16x4(PackedB_data + 36, v6); + MlasStoreFloat16x4(PackedB_data + 40, vA); + MlasStoreFloat16x4(PackedB_data + 44, vE); + } + + PackedB_data += k * 16; + } + } + + if (CountN & 8) { + const _mlas_fp16_* b = B_data; + size_t k = CountK; + constexpr size_t step = 8 * 8; // pack 8 * 8 + for (; k >= 8; k -= 8, b += 8, PackedB_data += step) { + float16x8_t v0 = MlasLoadFloat16x8(b); + float16x8_t v1 = MlasLoadFloat16x8(b + ldb); + float16x8_t v2 = MlasLoadFloat16x8(b + 2 * ldb); + float16x8_t v3 = MlasLoadFloat16x8(b + 3 * ldb); + float16x8_t v4 = MlasLoadFloat16x8(b + 4 * ldb); + float16x8_t v5 = MlasLoadFloat16x8(b + 5 * ldb); + float16x8_t v6 = MlasLoadFloat16x8(b + 6 * ldb); + float16x8_t v7 = MlasLoadFloat16x8(b + 7 * ldb); + Transpose8x8(v0, v1, v2, v3, v4, v5, v6, v7); + + MlasStoreFloat16x8(PackedB_data, v0); + MlasStoreFloat16x8(PackedB_data + 8, v1); + MlasStoreFloat16x8(PackedB_data + 16, v2); + MlasStoreFloat16x8(PackedB_data + 24, v3); + MlasStoreFloat16x8(PackedB_data + 32, v4); + MlasStoreFloat16x8(PackedB_data + 40, v5); + MlasStoreFloat16x8(PackedB_data + 48, v6); + MlasStoreFloat16x8(PackedB_data + 56, v7); + } + + if (k & 4) { + float16x4_t v0 = MlasLoadFloat16x4(b); + float16x4_t v1 = MlasLoadFloat16x4(b + ldb); + float16x4_t v2 = MlasLoadFloat16x4(b + 2 * ldb); + float16x4_t v3 = MlasLoadFloat16x4(b + 3 * ldb); + float16x4_t v4 = MlasLoadFloat16x4(b + 4 * ldb); + float16x4_t v5 = MlasLoadFloat16x4(b + 5 * ldb); + float16x4_t v6 = MlasLoadFloat16x4(b + 6 * ldb); + float16x4_t v7 = MlasLoadFloat16x4(b + 7 * ldb); + Transpose4x4(v0, v1, v2, v3); + Transpose4x4(v4, v5, v6, v7); + MlasStoreFloat16x4(PackedB_data, v0); + MlasStoreFloat16x4(PackedB_data + 4, v4); + MlasStoreFloat16x4(PackedB_data + 8, v1); + MlasStoreFloat16x4(PackedB_data + 12, v5); + MlasStoreFloat16x4(PackedB_data + 16, v2); + MlasStoreFloat16x4(PackedB_data + 20, v6); + MlasStoreFloat16x4(PackedB_data + 24, v3); + MlasStoreFloat16x4(PackedB_data + 28, v7); + k -= 4, b += 4, PackedB_data += 4 * 8; + } + + if (k > 0) { + float16x4_t v0 = MlasLoadPartialFloat16x4(b, k); + float16x4_t v1 = MlasLoadPartialFloat16x4(b + ldb, k); + float16x4_t v2 = MlasLoadPartialFloat16x4(b + 2 * ldb, k); + float16x4_t v3 = MlasLoadPartialFloat16x4(b + 3 * ldb, k); + float16x4_t v4 = MlasLoadPartialFloat16x4(b + 4 * ldb, k); + float16x4_t v5 = MlasLoadPartialFloat16x4(b + 5 * ldb, k); + float16x4_t v6 = MlasLoadPartialFloat16x4(b + 6 * ldb, k); + float16x4_t v7 = MlasLoadPartialFloat16x4(b + 7 * ldb, k); + Transpose4x4(v0, v1, v2, v3); + Transpose4x4(v4, v5, v6, v7); + MlasStoreFloat16x4(PackedB_data, v0); + MlasStoreFloat16x4(PackedB_data + 4, v4); + if (k > 1) { + MlasStoreFloat16x4(PackedB_data + 8, v1); + MlasStoreFloat16x4(PackedB_data + 12, v5); + } + if (k > 2) { + MlasStoreFloat16x4(PackedB_data + 16, v2); + MlasStoreFloat16x4(PackedB_data + 20, v6); + } + + PackedB_data += k * 8; + } + + B_data += 8 * ldb; + CountN -= 8; + } + + if (CountN > 0) { + const _mlas_fp16_* b = B_data; + size_t k = CountK; + constexpr size_t step = 8 * 8; // pack extended 8 * 8 + for (; k >= 8; k -= 8, b += 8, PackedB_data += step) { + float16x8_t v[8]; + size_t i = 0; + for (; i < CountN; ++i) { + v[i] = MlasLoadFloat16x8(b + i * ldb); + } + for (; i < 8; ++i) { + v[i] = MlasZeroFloat16x8(); + } + Transpose8x8(v[0], v[1], v[2], v[3], v[4], v[5], v[6], v[7]); + MlasStoreFloat16x8(PackedB_data, v[0]); + MlasStoreFloat16x8(PackedB_data + 8, v[1]); + MlasStoreFloat16x8(PackedB_data + 16, v[2]); + MlasStoreFloat16x8(PackedB_data + 24, v[3]); + MlasStoreFloat16x8(PackedB_data + 32, v[4]); + MlasStoreFloat16x8(PackedB_data + 40, v[5]); + MlasStoreFloat16x8(PackedB_data + 48, v[6]); + MlasStoreFloat16x8(PackedB_data + 56, v[7]); + } + + if (k & 4) { + float16x4_t v[8]; + size_t i = 0; + for (; i < CountN; ++i) { + v[i] = MlasLoadFloat16x4(b + i * ldb); + } + for (; i < 8; ++i) { + v[i] = MlasZeroFloat16x4(); + } + Transpose4x4(v[0], v[1], v[2], v[3]); + Transpose4x4(v[4], v[5], v[6], v[7]); + MlasStoreFloat16x4(PackedB_data, v[0]); + MlasStoreFloat16x4(PackedB_data + 4, v[4]); + MlasStoreFloat16x4(PackedB_data + 8, v[1]); + MlasStoreFloat16x4(PackedB_data + 12, v[5]); + MlasStoreFloat16x4(PackedB_data + 16, v[2]); + MlasStoreFloat16x4(PackedB_data + 20, v[6]); + MlasStoreFloat16x4(PackedB_data + 24, v[3]); + MlasStoreFloat16x4(PackedB_data + 28, v[7]); + k -= 4, b += 4, PackedB_data += 4 * 8; + } + + if (k > 0) { + float16x4_t v[8]; + size_t i = 0; + for (; i < CountN; ++i) { + v[i] = MlasLoadPartialFloat16x4(b + i * ldb, k); + } + for (; i < 8; ++i) { + v[i] = MlasZeroFloat16x4(); + } + Transpose4x4(v[0], v[1], v[2], v[3]); + Transpose4x4(v[4], v[5], v[6], v[7]); + MlasStoreFloat16x4(PackedB_data, v[0]); + MlasStoreFloat16x4(PackedB_data + 4, v[4]); + if (k > 1) { + MlasStoreFloat16x4(PackedB_data + 8, v[1]); + MlasStoreFloat16x4(PackedB_data + 12, v[5]); + } + if (k > 2) { + MlasStoreFloat16x4(PackedB_data + 16, v[2]); + MlasStoreFloat16x4(PackedB_data + 20, v[6]); + } + } + } +} + +MLAS_FORCEINLINE +float16x8_t addq_f16x4(float16x8_t v0, float16x8_t v1, float16x8_t v2, float16x8_t v3) { + v0 = vaddq_f16(v0, v1); + v2 = vaddq_f16(v2, v3); + v0 = vaddq_f16(v0, v2); + return v0; +} + +MLAS_FORCEINLINE +float16x8_t addq_f16x8(float16x8_t v0, float16x8_t v1, float16x8_t v2, float16x8_t v3, + float16x8_t v4, float16x8_t v5, float16x8_t v6, float16x8_t v7) { + return vaddq_f16(addq_f16x4(v0, v1, v2, v3), addq_f16x4(v4, v5, v6, v7)); +} + +MLAS_FORCEINLINE +float16x8_t maq_lane_f16_accu(float16x8_t accu0, float16x8_t v0, float16x8_t v1, float16x8_t v2, float16x8_t v3, + float16x4_t a0) { + accu0 = vfmaq_lane_f16(accu0, v0, a0, 0); + accu0 = vfmaq_lane_f16(accu0, v1, a0, 1); + accu0 = vfmaq_lane_f16(accu0, v2, a0, 2); + accu0 = vfmaq_lane_f16(accu0, v3, a0, 3); + return accu0; +} + +MLAS_FORCEINLINE +float16x8_t maq_laneq_f16_accu(float16x8_t accu0, float16x8_t v0, float16x8_t v1, float16x8_t v2, float16x8_t v3, + float16x8_t v4, float16x8_t v5, float16x8_t v6, float16x8_t v7, float16x8_t a0) { + accu0 = vfmaq_laneq_f16(accu0, v0, a0, 0); + accu0 = vfmaq_laneq_f16(accu0, v1, a0, 1); + accu0 = vfmaq_laneq_f16(accu0, v2, a0, 2); + accu0 = vfmaq_laneq_f16(accu0, v3, a0, 3); + accu0 = vfmaq_laneq_f16(accu0, v4, a0, 4); + accu0 = vfmaq_laneq_f16(accu0, v5, a0, 5); + accu0 = vfmaq_laneq_f16(accu0, v6, a0, 6); + accu0 = vfmaq_laneq_f16(accu0, v7, a0, 7); + return accu0; +} + +MLAS_FORCEINLINE +float16x4_t ma_lane_f16_accu(float16x4_t accu, float16x4_t v0, float16x4_t v1, float16x4_t v2, float16x4_t v3, + float16x4_t a0) { + accu = vfma_lane_f16(accu, v0, a0, 0); + accu = vfma_lane_f16(accu, v1, a0, 1); + accu = vfma_lane_f16(accu, v2, a0, 2); + accu = vfma_lane_f16(accu, v3, a0, 3); + return accu; +} + +template // 0: beta == 0.0f16, 1: beta == 1.0f16, 2: beta != 0.0f16 && beta != 1.0f16 +void HGemm_TransposedB_Kernel_M1( + const _mlas_fp16_* A_data, + const _mlas_fp16_* B_data, + _mlas_fp16_* C_data, + size_t CountN, + size_t CountK, + size_t ldb, + _mlas_fp16_ alpha, + _mlas_fp16_ beta +) { + for (; CountN >= 8; CountN -= 8, B_data += 8 * ldb, C_data += 8) { + const auto* a = A_data; + const auto* b = B_data; + size_t k = CountK; + float16x8_t accu0 = MlasZeroFloat16x8(); + float16x8_t accu1 = MlasZeroFloat16x8(); + float16x8_t accu2 = MlasZeroFloat16x8(); + float16x8_t accu3 = MlasZeroFloat16x8(); + float16x8_t accu4 = MlasZeroFloat16x8(); + float16x8_t accu5 = MlasZeroFloat16x8(); + float16x8_t accu6 = MlasZeroFloat16x8(); + float16x8_t accu7 = MlasZeroFloat16x8(); + for (; k >= 8; k -= 8, a += 8, b += 8) { + float16x8_t b0 = MlasLoadFloat16x8(b); + float16x8_t b1 = MlasLoadFloat16x8(b + ldb); + float16x8_t b2 = MlasLoadFloat16x8(b + 2 * ldb); + float16x8_t b3 = MlasLoadFloat16x8(b + 3 * ldb); + float16x8_t b4 = MlasLoadFloat16x8(b + 4 * ldb); + float16x8_t b5 = MlasLoadFloat16x8(b + 5 * ldb); + float16x8_t b6 = MlasLoadFloat16x8(b + 6 * ldb); + float16x8_t b7 = MlasLoadFloat16x8(b + 7 * ldb); + float16x8_t a0 = MlasLoadFloat16x8(a); + accu0 = vfmaq_f16(accu0, b0, a0); + accu1 = vfmaq_f16(accu1, b1, a0); + accu2 = vfmaq_f16(accu2, b2, a0); + accu3 = vfmaq_f16(accu3, b3, a0); + accu4 = vfmaq_f16(accu4, b4, a0); + accu5 = vfmaq_f16(accu5, b5, a0); + accu6 = vfmaq_f16(accu6, b6, a0); + accu7 = vfmaq_f16(accu7, b7, a0); + } + Transpose8x8(accu0, accu1, accu2, accu3, accu4, accu5, accu6, accu7); + accu0 = addq_f16x8(accu0, accu1, accu2, accu3, accu4, accu5, accu6, accu7); // accumulator of 8 columns + + if (k & 4) { + float16x4_t b0 = MlasLoadFloat16x4(b); + float16x4_t b1 = MlasLoadFloat16x4(b + ldb); + float16x4_t b2 = MlasLoadFloat16x4(b + 2 * ldb); + float16x4_t b3 = MlasLoadFloat16x4(b + 3 * ldb); + float16x4_t b4 = MlasLoadFloat16x4(b + 4 * ldb); + float16x4_t b5 = MlasLoadFloat16x4(b + 5 * ldb); + float16x4_t b6 = MlasLoadFloat16x4(b + 6 * ldb); + float16x4_t b7 = MlasLoadFloat16x4(b + 7 * ldb); + Transpose4x4(b0, b1, b2, b3); + Transpose4x4(b4, b5, b6, b7); + float16x8_t v0 = vcombine_f16(b0, b4); + float16x8_t v1 = vcombine_f16(b1, b5); + float16x8_t v2 = vcombine_f16(b2, b6); + float16x8_t v3 = vcombine_f16(b3, b7); + float16x4_t a0 = MlasLoadFloat16x4(a); + accu0 = maq_lane_f16_accu(accu0, v0, v1, v2, v3, a0); + k -= 4, a += 4, b += 4; + } + + if (k > 0) { + float16x4_t b0 = MlasLoadPartialFloat16x4(b, k); + float16x4_t b1 = MlasLoadPartialFloat16x4(b + ldb, k); + float16x4_t b2 = MlasLoadPartialFloat16x4(b + 2 * ldb, k); + float16x4_t b3 = MlasLoadPartialFloat16x4(b + 3 * ldb, k); + float16x4_t b4 = MlasLoadPartialFloat16x4(b + 4 * ldb, k); + float16x4_t b5 = MlasLoadPartialFloat16x4(b + 5 * ldb, k); + float16x4_t b6 = MlasLoadPartialFloat16x4(b + 6 * ldb, k); + float16x4_t b7 = MlasLoadPartialFloat16x4(b + 7 * ldb, k); + Transpose4x4(b0, b1, b2, b3); + Transpose4x4(b4, b5, b6, b7); + float16x8_t v0 = vcombine_f16(b0, b4), v1, v2; + float16x4_t a0 = MlasLoadPartialFloat16x4(a, k); + accu0 = vfmaq_lane_f16(accu0, v0, a0, 0); + if (k > 1) { + v1 = vcombine_f16(b1, b5); + accu0 = vfmaq_lane_f16(accu0, v1, a0, 1); + } + if (k > 2) { + v2 = vcombine_f16(b2, b6); + accu0 = vfmaq_lane_f16(accu0, v2, a0, 2); + } + } + + if constexpr (beta_behavior == 1) { + float16x8_t c = MlasLoadFloat16x8(C_data); + float16x8_t alpha_v = MlasBroadcastFloat16x8(alpha); + accu0 = vfmaq_f16(c, accu0, alpha_v); + MlasStoreFloat16x8(C_data, accu0); + } else if constexpr (beta_behavior == 2) { + float16x8_t c = MlasLoadFloat16x8(C_data); + float16x8_t alpha_v = MlasBroadcastFloat16x8(alpha); + float16x8_t beta_v = MlasBroadcastFloat16x8(beta); + accu0 = vfmaq_f16(vmulq_f16(c, beta_v), accu0, alpha_v); + MlasStoreFloat16x8(C_data, accu0); + } else { + float16x8_t alpha_v = MlasBroadcastFloat16x8(alpha); + accu0 = vmulq_f16(accu0, alpha_v); + MlasStoreFloat16x8(C_data, accu0); + } + } + + if (CountN & 4) { + const auto* a = A_data; + const auto* b = B_data; + size_t k = CountK; + float16x8_t accu0 = MlasZeroFloat16x8(); + float16x8_t accu1 = MlasZeroFloat16x8(); + float16x8_t accu2 = MlasZeroFloat16x8(); + float16x8_t accu3 = MlasZeroFloat16x8(); + for (; k >= 8; k -= 8, a += 8, b += 8) { + float16x8_t b0 = MlasLoadFloat16x8(b); + float16x8_t b1 = MlasLoadFloat16x8(b + ldb); + float16x8_t b2 = MlasLoadFloat16x8(b + 2 * ldb); + float16x8_t b3 = MlasLoadFloat16x8(b + 3 * ldb); + float16x8_t a0 = MlasLoadFloat16x8(a); + accu0 = vfmaq_f16(accu0, b0, a0); + accu1 = vfmaq_f16(accu1, b1, a0); + accu2 = vfmaq_f16(accu2, b2, a0); + accu3 = vfmaq_f16(accu3, b3, a0); + } + Transpose4x8(accu0, accu1, accu2, accu3); + accu0 = addq_f16x4(accu0, accu1, accu2, accu3); // accumulator of 4 columns + float16x4_t accu = vadd_f16(vget_low_f16(accu0), vget_high_f16(accu0)); + + if (k & 4) { + float16x4_t b0 = MlasLoadFloat16x4(b); + float16x4_t b1 = MlasLoadFloat16x4(b + ldb); + float16x4_t b2 = MlasLoadFloat16x4(b + 2 * ldb); + float16x4_t b3 = MlasLoadFloat16x4(b + 3 * ldb); + Transpose4x4(b0, b1, b2, b3); + float16x4_t a0 = MlasLoadFloat16x4(a); + accu = ma_lane_f16_accu(accu, b0, b1, b2, b3, a0); + k -= 4, a += 4, b += 4; + } + + if (k > 0) { + float16x4_t b0 = MlasLoadPartialFloat16x4(b, k); + float16x4_t b1 = MlasLoadPartialFloat16x4(b + ldb, k); + float16x4_t b2 = MlasLoadPartialFloat16x4(b + 2 * ldb, k); + float16x4_t b3 = MlasLoadPartialFloat16x4(b + 3 * ldb, k); + Transpose4x4(b0, b1, b2, b3); + float16x4_t a0 = MlasLoadPartialFloat16x4(a, k); + accu = vfma_lane_f16(accu, b0, a0, 0); + if (k > 1) { + accu = vfma_lane_f16(accu, b1, a0, 1); + } + if (k > 2) { + accu = vfma_lane_f16(accu, b2, a0, 2); + } + } + + if constexpr (beta_behavior == 1) { + float16x4_t c = MlasLoadFloat16x4(C_data); + float16x4_t alpha_v = MlasBroadcastFloat16x4(alpha); + accu = vfma_f16(c, accu, alpha_v); + MlasStoreFloat16x4(C_data, accu); + } else if constexpr (beta_behavior == 2) { + float16x4_t c = MlasLoadFloat16x4(C_data); + float16x4_t alpha_v = MlasBroadcastFloat16x4(alpha); + float16x4_t beta_v = MlasBroadcastFloat16x4(beta); + accu = vfma_f16(vmul_f16(c, beta_v), accu, alpha_v); + MlasStoreFloat16x4(C_data, accu); + } else { + float16x4_t alpha_v = MlasBroadcastFloat16x4(alpha); + accu = vmul_f16(accu, alpha_v); + MlasStoreFloat16x4(C_data, accu); + } + + CountN -= 4, B_data += 4 * ldb, C_data += 4; + } + + if (CountN > 0) { + const auto* a = A_data; + const auto* b = B_data; + size_t k = CountK; + float16x8_t accus[4]; + size_t i = 0; + for (i = 0; i < 4; ++i) { + accus[i] = MlasZeroFloat16x8(); + } + for (; k >= 8; k -= 8, a += 8, b += 8) { + float16x8_t a0 = MlasLoadFloat16x8(a); + for (i = 0; i < CountN; ++i) { + accus[i] = vfmaq_f16(accus[i], MlasLoadFloat16x8(b + i * ldb), a0); + } + } + Transpose4x8(accus[0], accus[1], accus[2], accus[3]); + float16x8_t accu0 = addq_f16x4(accus[0], accus[1], accus[2], accus[3]); // accumulator of 4 columns + float16x4_t accu = vadd_f16(vget_low_f16(accu0), vget_high_f16(accu0)); + + if (k & 4) { + float16x4_t bs[4]; + for (i = 0; i < CountN; ++i) { + bs[i] = MlasLoadFloat16x4(b + i * ldb); + } + for (; i < 4; ++i) { + bs[i] = MlasZeroFloat16x4(); + } + Transpose4x4(bs[0], bs[1], bs[2], bs[3]); + float16x4_t a0 = MlasLoadFloat16x4(a); + accu = ma_lane_f16_accu(accu, bs[0], bs[1], bs[2], bs[3], a0); + k -= 4, a += 4, b += 4; + } + + if (k > 0) { + float16x4_t bs[4]; + for (i = 0; i < CountN; ++i) { + bs[i] = MlasLoadPartialFloat16x4(b + i * ldb, k); + } + for (; i < 4; ++i) { + bs[i] = MlasZeroFloat16x4(); + } + Transpose4x4(bs[0], bs[1], bs[2], bs[3]); + float16x4_t a0 = MlasLoadPartialFloat16x4(a, k); + accu = vfma_lane_f16(accu, bs[0], a0, 0); + if (k > 1) { + accu = vfma_lane_f16(accu, bs[1], a0, 1); + } + if (k > 2) { + accu = vfma_lane_f16(accu, bs[2], a0, 2); + } + } + + if constexpr (beta_behavior == 1) { + float16x4_t c = MlasLoadPartialFloat16x4(C_data, CountN); + float16x4_t alpha_v = MlasBroadcastFloat16x4(alpha); + accu = vfma_f16(c, accu, alpha_v); + MlasStorePartialFloat16x4(C_data, accu, CountN); + } else if constexpr (beta_behavior == 2) { + float16x4_t c = MlasLoadPartialFloat16x4(C_data, CountN); + float16x4_t alpha_v = MlasBroadcastFloat16x4(alpha); + float16x4_t beta_v = MlasBroadcastFloat16x4(beta); + accu = vfma_f16(vmul_f16(c, beta_v), accu, alpha_v); + MlasStorePartialFloat16x4(C_data, accu, CountN); + } else { + float16x4_t alpha_v = MlasBroadcastFloat16x4(alpha); + accu = vmul_f16(accu, alpha_v); + MlasStorePartialFloat16x4(C_data, accu, CountN); + } + } +} + +template // 0: beta == 0.0f16, 1: beta == 1.0f16, 2: beta != 0.0f16 && beta != 1.0f16 +void HGemm_TransposedB_Kernel_M2( + const _mlas_fp16_* A_data, + const _mlas_fp16_* B_data, + _mlas_fp16_* C_data, + size_t CountN, + size_t CountK, + size_t lda, + size_t ldb, + size_t ldc, + _mlas_fp16_ alpha, + _mlas_fp16_ beta +) { + for (; CountN >= 8; CountN -= 8, B_data += 8 * ldb, C_data += 8) { + const auto* a = A_data; + const auto* b = B_data; + size_t k = CountK; + float16x8_t accu00 = MlasZeroFloat16x8(); + float16x8_t accu01 = MlasZeroFloat16x8(); + float16x8_t accu02 = MlasZeroFloat16x8(); + float16x8_t accu03 = MlasZeroFloat16x8(); + float16x8_t accu04 = MlasZeroFloat16x8(); + float16x8_t accu05 = MlasZeroFloat16x8(); + float16x8_t accu06 = MlasZeroFloat16x8(); + float16x8_t accu07 = MlasZeroFloat16x8(); + float16x8_t accu10 = MlasZeroFloat16x8(); + float16x8_t accu11 = MlasZeroFloat16x8(); + float16x8_t accu12 = MlasZeroFloat16x8(); + float16x8_t accu13 = MlasZeroFloat16x8(); + float16x8_t accu14 = MlasZeroFloat16x8(); + float16x8_t accu15 = MlasZeroFloat16x8(); + float16x8_t accu16 = MlasZeroFloat16x8(); + float16x8_t accu17 = MlasZeroFloat16x8(); + for (; k >= 8; k -= 8, a += 8, b += 8) { + float16x8_t b0 = MlasLoadFloat16x8(b); + float16x8_t b1 = MlasLoadFloat16x8(b + ldb); + float16x8_t b2 = MlasLoadFloat16x8(b + 2 * ldb); + float16x8_t b3 = MlasLoadFloat16x8(b + 3 * ldb); + float16x8_t b4 = MlasLoadFloat16x8(b + 4 * ldb); + float16x8_t b5 = MlasLoadFloat16x8(b + 5 * ldb); + float16x8_t b6 = MlasLoadFloat16x8(b + 6 * ldb); + float16x8_t b7 = MlasLoadFloat16x8(b + 7 * ldb); + float16x8_t a0 = MlasLoadFloat16x8(a); + float16x8_t a1 = MlasLoadFloat16x8(a + lda); + accu00 = vfmaq_f16(accu00, b0, a0); + accu01 = vfmaq_f16(accu01, b1, a0); + accu02 = vfmaq_f16(accu02, b2, a0); + accu03 = vfmaq_f16(accu03, b3, a0); + accu04 = vfmaq_f16(accu04, b4, a0); + accu05 = vfmaq_f16(accu05, b5, a0); + accu06 = vfmaq_f16(accu06, b6, a0); + accu07 = vfmaq_f16(accu07, b7, a0); + accu10 = vfmaq_f16(accu10, b0, a1); + accu11 = vfmaq_f16(accu11, b1, a1); + accu12 = vfmaq_f16(accu12, b2, a1); + accu13 = vfmaq_f16(accu13, b3, a1); + accu14 = vfmaq_f16(accu14, b4, a1); + accu15 = vfmaq_f16(accu15, b5, a1); + accu16 = vfmaq_f16(accu16, b6, a1); + accu17 = vfmaq_f16(accu17, b7, a1); + } + Transpose8x8(accu00, accu01, accu02, accu03, accu04, accu05, accu06, accu07); + Transpose8x8(accu10, accu11, accu12, accu13, accu14, accu15, accu16, accu17); + accu00 = addq_f16x8(accu00, accu01, accu02, accu03, accu04, accu05, accu06, accu07); + accu10 = addq_f16x8(accu10, accu11, accu12, accu13, accu14, accu15, accu16, accu17); + + if (k & 4) { + float16x4_t b0 = MlasLoadFloat16x4(b); + float16x4_t b1 = MlasLoadFloat16x4(b + ldb); + float16x4_t b2 = MlasLoadFloat16x4(b + 2 * ldb); + float16x4_t b3 = MlasLoadFloat16x4(b + 3 * ldb); + float16x4_t b4 = MlasLoadFloat16x4(b + 4 * ldb); + float16x4_t b5 = MlasLoadFloat16x4(b + 5 * ldb); + float16x4_t b6 = MlasLoadFloat16x4(b + 6 * ldb); + float16x4_t b7 = MlasLoadFloat16x4(b + 7 * ldb); + Transpose4x4(b0, b1, b2, b3); + Transpose4x4(b4, b5, b6, b7); + float16x8_t v0 = vcombine_f16(b0, b4); + float16x8_t v1 = vcombine_f16(b1, b5); + float16x8_t v2 = vcombine_f16(b2, b6); + float16x8_t v3 = vcombine_f16(b3, b7); + float16x4_t a0 = MlasLoadFloat16x4(a); + float16x4_t a1 = MlasLoadFloat16x4(a + lda); + accu00 = maq_lane_f16_accu(accu00, v0, v1, v2, v3, a0); + accu10 = maq_lane_f16_accu(accu10, v0, v1, v2, v3, a1); + k -= 4, a += 4, b += 4; + } + + if (k > 0) { + float16x4_t b0 = MlasLoadPartialFloat16x4(b, k); + float16x4_t b1 = MlasLoadPartialFloat16x4(b + ldb, k); + float16x4_t b2 = MlasLoadPartialFloat16x4(b + 2 * ldb, k); + float16x4_t b3 = MlasLoadPartialFloat16x4(b + 3 * ldb, k); + float16x4_t b4 = MlasLoadPartialFloat16x4(b + 4 * ldb, k); + float16x4_t b5 = MlasLoadPartialFloat16x4(b + 5 * ldb, k); + float16x4_t b6 = MlasLoadPartialFloat16x4(b + 6 * ldb, k); + float16x4_t b7 = MlasLoadPartialFloat16x4(b + 7 * ldb, k); + Transpose4x4(b0, b1, b2, b3); + Transpose4x4(b4, b5, b6, b7); + float16x8_t v0 = vcombine_f16(b0, b4); + float16x4_t a0 = MlasLoadPartialFloat16x4(a, k); + float16x4_t a1 = MlasLoadPartialFloat16x4(a + lda, k); + accu00 = vfmaq_lane_f16(accu00, v0, a0, 0); + accu10 = vfmaq_lane_f16(accu10, v0, a1, 0); + if (k > 1) { + float16x8_t v1 = vcombine_f16(b1, b5); + accu00 = vfmaq_lane_f16(accu00, v1, a0, 1); + accu10 = vfmaq_lane_f16(accu10, v1, a1, 1); + } + if (k > 2) { + float16x8_t v2 = vcombine_f16(b2, b6); + accu00 = vfmaq_lane_f16(accu00, v2, a0, 2); + accu10 = vfmaq_lane_f16(accu10, v2, a1, 2); + } + } + + if constexpr (beta_behavior == 1) { + float16x8_t c0 = MlasLoadFloat16x8(C_data); + float16x8_t c1 = MlasLoadFloat16x8(C_data + ldc); + float16x8_t alpha_v = MlasBroadcastFloat16x8(alpha); + accu00 = vfmaq_f16(c0, accu00, alpha_v); + accu10 = vfmaq_f16(c1, accu10, alpha_v); + MlasStoreFloat16x8(C_data, accu00); + MlasStoreFloat16x8(C_data + ldc, accu10); + } else if constexpr (beta_behavior == 2) { + float16x8_t c0 = MlasLoadFloat16x8(C_data); + float16x8_t c1 = MlasLoadFloat16x8(C_data + ldc); + float16x8_t alpha_v = MlasBroadcastFloat16x8(alpha); + float16x8_t beta_v = MlasBroadcastFloat16x8(beta); + accu00 = vfmaq_f16(vmulq_f16(c0, beta_v), accu00, alpha_v); + accu10 = vfmaq_f16(vmulq_f16(c1, beta_v), accu10, alpha_v); + MlasStoreFloat16x8(C_data, accu00); + MlasStoreFloat16x8(C_data + ldc, accu10); + } else { + float16x8_t alpha_v = MlasBroadcastFloat16x8(alpha); + accu00 = vmulq_f16(accu00, alpha_v); + accu10 = vmulq_f16(accu10, alpha_v); + MlasStoreFloat16x8(C_data, accu00); + MlasStoreFloat16x8(C_data + ldc, accu10); + } + } + + if (CountN & 4) { + const auto* a = A_data; + const auto* b = B_data; + size_t k = CountK; + float16x8_t accu00 = MlasZeroFloat16x8(); + float16x8_t accu01 = MlasZeroFloat16x8(); + float16x8_t accu02 = MlasZeroFloat16x8(); + float16x8_t accu03 = MlasZeroFloat16x8(); + float16x8_t accu10 = MlasZeroFloat16x8(); + float16x8_t accu11 = MlasZeroFloat16x8(); + float16x8_t accu12 = MlasZeroFloat16x8(); + float16x8_t accu13 = MlasZeroFloat16x8(); + for (; k >= 8; k -= 8, a += 8, b += 8) { + float16x8_t b0 = MlasLoadFloat16x8(b); + float16x8_t b1 = MlasLoadFloat16x8(b + ldb); + float16x8_t b2 = MlasLoadFloat16x8(b + 2 * ldb); + float16x8_t b3 = MlasLoadFloat16x8(b + 3 * ldb); + float16x8_t a0 = MlasLoadFloat16x8(a); + float16x8_t a1 = MlasLoadFloat16x8(a + lda); + accu00 = vfmaq_f16(accu00, b0, a0); + accu01 = vfmaq_f16(accu01, b1, a0); + accu02 = vfmaq_f16(accu02, b2, a0); + accu03 = vfmaq_f16(accu03, b3, a0); + accu10 = vfmaq_f16(accu10, b0, a1); + accu11 = vfmaq_f16(accu11, b1, a1); + accu12 = vfmaq_f16(accu12, b2, a1); + accu13 = vfmaq_f16(accu13, b3, a1); + } + Transpose4x8(accu00, accu01, accu02, accu03); + Transpose4x8(accu10, accu11, accu12, accu13); + accu00 = addq_f16x4(accu00, accu01, accu02, accu03); + accu10 = addq_f16x4(accu10, accu11, accu12, accu13); + float16x4_t accu0 = vadd_f16(vget_low_f16(accu00), vget_high_f16(accu00)); + float16x4_t accu1 = vadd_f16(vget_low_f16(accu10), vget_high_f16(accu10)); + + if (k & 4) { + float16x4_t b0 = MlasLoadFloat16x4(b); + float16x4_t b1 = MlasLoadFloat16x4(b + ldb); + float16x4_t b2 = MlasLoadFloat16x4(b + 2 * ldb); + float16x4_t b3 = MlasLoadFloat16x4(b + 3 * ldb); + Transpose4x4(b0, b1, b2, b3); + float16x4_t a0 = MlasLoadFloat16x4(a); + float16x4_t a1 = MlasLoadFloat16x4(a + lda); + accu0 = ma_lane_f16_accu(accu0, b0, b1, b2, b3, a0); + accu1 = ma_lane_f16_accu(accu1, b0, b1, b2, b3, a1); + k -= 4, a += 4, b += 4; + } + + if (k > 0) { + float16x4_t b0 = MlasLoadPartialFloat16x4(b, k); + float16x4_t b1 = MlasLoadPartialFloat16x4(b + ldb, k); + float16x4_t b2 = MlasLoadPartialFloat16x4(b + 2 * ldb, k); + float16x4_t b3 = MlasLoadPartialFloat16x4(b + 3 * ldb, k); + Transpose4x4(b0, b1, b2, b3); + float16x4_t a0 = MlasLoadPartialFloat16x4(a, k); + float16x4_t a1 = MlasLoadPartialFloat16x4(a + lda, k); + accu0 = vfma_lane_f16(accu0, b0, a0, 0); + accu1 = vfma_lane_f16(accu1, b0, a1, 0); + if (k > 1) { + accu0 = vfma_lane_f16(accu0, b1, a0, 1); + accu1 = vfma_lane_f16(accu1, b1, a1, 1); + } + if (k > 2) { + accu0 = vfma_lane_f16(accu0, b2, a0, 2); + accu1 = vfma_lane_f16(accu1, b2, a1, 2); + } + } + + if constexpr (beta_behavior == 1) { + float16x4_t c0 = MlasLoadFloat16x4(C_data); + float16x4_t c1 = MlasLoadFloat16x4(C_data + ldc); + float16x4_t alpha_v = MlasBroadcastFloat16x4(alpha); + accu0 = vfma_f16(c0, accu0, alpha_v); + accu1 = vfma_f16(c1, accu1, alpha_v); + MlasStoreFloat16x4(C_data, accu0); + MlasStoreFloat16x4(C_data + ldc, accu1); + } else if constexpr (beta_behavior == 2) { + float16x4_t c0 = MlasLoadFloat16x4(C_data); + float16x4_t c1 = MlasLoadFloat16x4(C_data + ldc); + float16x4_t alpha_v = MlasBroadcastFloat16x4(alpha); + float16x4_t beta_v = MlasBroadcastFloat16x4(beta); + accu0 = vfma_f16(vmul_f16(c0, beta_v), accu0, alpha_v); + accu1 = vfma_f16(vmul_f16(c1, beta_v), accu1, alpha_v); + MlasStoreFloat16x4(C_data, accu0); + MlasStoreFloat16x4(C_data + ldc, accu1); + } else { + float16x4_t alpha_v = MlasBroadcastFloat16x4(alpha); + accu0 = vmul_f16(accu0, alpha_v); + accu1 = vmul_f16(accu1, alpha_v); + MlasStoreFloat16x4(C_data, accu0); + MlasStoreFloat16x4(C_data + ldc, accu1); + } + + CountN -= 4, B_data += 4 * ldb, C_data += 4; + } + + if (CountN > 0) { + const auto* a = A_data; + const auto* b = B_data; + size_t k = CountK; + float16x8_t accu0[4]; + float16x8_t accu1[4]; + size_t i = 0; + for (i = 0; i < 4; ++i) { + accu0[i] = MlasZeroFloat16x8(); + accu1[i] = MlasZeroFloat16x8(); + } + for (; k >= 8; k -= 8, a += 8, b += 8) { + float16x8_t a0 = MlasLoadFloat16x8(a); + float16x8_t a1 = MlasLoadFloat16x8(a + lda); + for (i = 0; i < CountN; ++i) { + float16x8_t bi = MlasLoadFloat16x8(b + i * ldb); + accu0[i] = vfmaq_f16(accu0[i], bi, a0); + accu1[i] = vfmaq_f16(accu1[i], bi, a1); + } + } + Transpose4x8(accu0[0], accu0[1], accu0[2], accu0[3]); + Transpose4x8(accu1[0], accu1[1], accu1[2], accu1[3]); + float16x8_t accu00 = addq_f16x4(accu0[0], accu0[1], accu0[2], accu0[3]); + float16x4_t accu_0 = vadd_f16(vget_low_f16(accu00), vget_high_f16(accu00)); + float16x8_t accu10 = addq_f16x4(accu1[0], accu1[1], accu1[2], accu1[3]); + float16x4_t accu_1 = vadd_f16(vget_low_f16(accu10), vget_high_f16(accu10)); + + if (k & 4) { + float16x4_t bs[4]; + for (i = 0; i < CountN; ++i) { + bs[i] = MlasLoadFloat16x4(b + i * ldb); + } + for (; i < 4; ++i) { + bs[i] = MlasZeroFloat16x4(); + } + Transpose4x4(bs[0], bs[1], bs[2], bs[3]); + float16x4_t a0 = MlasLoadFloat16x4(a); + float16x4_t a1 = MlasLoadFloat16x4(a + lda); + accu_0 = ma_lane_f16_accu(accu_0, bs[0], bs[1], bs[2], bs[3], a0); + accu_1 = ma_lane_f16_accu(accu_1, bs[0], bs[1], bs[2], bs[3], a1); + k -= 4, a += 4, b += 4; + } + + if (k > 0) { + float16x4_t bs[4]; + for (i = 0; i < CountN; ++i) { + bs[i] = MlasLoadPartialFloat16x4(b + i * ldb, k); + } + for (; i < 4; ++i) { + bs[i] = MlasZeroFloat16x4(); + } + Transpose4x4(bs[0], bs[1], bs[2], bs[3]); + float16x4_t a0 = MlasLoadPartialFloat16x4(a, k); + float16x4_t a1 = MlasLoadPartialFloat16x4(a + lda, k); + accu_0 = vfma_lane_f16(accu_0, bs[0], a0, 0); + accu_1 = vfma_lane_f16(accu_1, bs[0], a1, 0); + if (k > 1) { + accu_0 = vfma_lane_f16(accu_0, bs[1], a0, 1); + accu_1 = vfma_lane_f16(accu_1, bs[1], a1, 1); + } + if (k > 2) { + accu_0 = vfma_lane_f16(accu_0, bs[2], a0, 2); + accu_1 = vfma_lane_f16(accu_1, bs[2], a1, 2); + } + } + + if constexpr (beta_behavior == 1) { + float16x4_t c0 = MlasLoadPartialFloat16x4(C_data, CountN); + float16x4_t c1 = MlasLoadPartialFloat16x4(C_data + ldc, CountN); + float16x4_t alpha_v = MlasBroadcastFloat16x4(alpha); + accu_0 = vfma_f16(c0, accu_0, alpha_v); + accu_1 = vfma_f16(c1, accu_1, alpha_v); + MlasStorePartialFloat16x4(C_data, accu_0, CountN); + MlasStorePartialFloat16x4(C_data + ldc, accu_1, CountN); + } else if constexpr (beta_behavior == 2) { + float16x4_t c0 = MlasLoadPartialFloat16x4(C_data, CountN); + float16x4_t c1 = MlasLoadPartialFloat16x4(C_data + ldc, CountN); + float16x4_t alpha_v = MlasBroadcastFloat16x4(alpha); + float16x4_t beta_v = MlasBroadcastFloat16x4(beta); + accu_0 = vfma_f16(vmul_f16(c0, beta_v), accu_0, alpha_v); + accu_1 = vfma_f16(vmul_f16(c1, beta_v), accu_1, alpha_v); + MlasStorePartialFloat16x4(C_data, accu_0, CountN); + MlasStorePartialFloat16x4(C_data + ldc, accu_1, CountN); + } else { + float16x4_t alpha_v = MlasBroadcastFloat16x4(alpha); + accu_0 = vmul_f16(accu_0, alpha_v); + accu_1 = vmul_f16(accu_1, alpha_v); + MlasStorePartialFloat16x4(C_data, accu_0, CountN); + MlasStorePartialFloat16x4(C_data + ldc, accu_1, CountN); + } + } +} + +// Full K. Directly save to C. +void HGemm_TransposedB_Kernel( + const MLAS_FP16* A, + const MLAS_FP16* B, + MLAS_FP16* C, + size_t CountM, + size_t CountN, + size_t CountK, + size_t lda, + size_t ldb, + size_t ldc, + _mlas_fp16_ alpha, + _mlas_fp16_ beta +) { + if (CountM > 2) { + MLAS_THROW_EX(std::runtime_error, "HGemm_TransposedB_Kernel only support <= 2 rows"); + } + const auto* A_data = reinterpret_cast(A); + const auto* B_data = reinterpret_cast(B); + auto* C_data = reinterpret_cast<_mlas_fp16_*>(C); + const auto f16_0 = MLAS_FP16(0.0f); + const auto f16_1 = MLAS_FP16(1.0f); + if (CountM == 1) { + if (beta == f16_0.val) { + HGemm_TransposedB_Kernel_M1<0>(A_data, B_data, C_data, CountN, CountK, ldb, alpha, beta); + } else if (beta == f16_1.val) { + HGemm_TransposedB_Kernel_M1<1>(A_data, B_data, C_data, CountN, CountK, ldb, alpha, beta); + } else { + HGemm_TransposedB_Kernel_M1<2>(A_data, B_data, C_data, CountN, CountK, ldb, alpha, beta); + } + } else { + if (beta == f16_0.val) { + HGemm_TransposedB_Kernel_M2<0>(A_data, B_data, C_data, CountN, CountK, lda, ldb, ldc, alpha, beta); + } else if (beta == f16_1.val) { + HGemm_TransposedB_Kernel_M2<1>(A_data, B_data, C_data, CountN, CountK, lda, ldb, ldc, alpha, beta); + } else { + HGemm_TransposedB_Kernel_M2<2>(A_data, B_data, C_data, CountN, CountK, lda, ldb, ldc, alpha, beta); + } + } +} + +template // 0: beta == 0, 1: beta == 1, 2: beta != 0 && beta != 1 +void HGemm_TransposedPackedB_Kernel_M1( + const _mlas_fp16_* A, + const _mlas_fp16_* PackedB, + _mlas_fp16_* C, + size_t CountN, + size_t CountK, + _mlas_fp16_ alpha, + _mlas_fp16_ beta +) { + for (; CountN >= 16; CountN -= 16, C += 16) { + const auto* a = A; + size_t k = CountK; + float16x8_t accu0 = MlasZeroFloat16x8(); + float16x8_t accu1 = MlasZeroFloat16x8(); + for (; k >= 8; k -= 8, a += 8, PackedB += 8 * 16) { + float16x8_t b00 = MlasLoadFloat16x8(PackedB); + float16x8_t b01 = MlasLoadFloat16x8(PackedB + 8); + float16x8_t b10 = MlasLoadFloat16x8(PackedB + 16); + float16x8_t b11 = MlasLoadFloat16x8(PackedB + 24); + float16x8_t b20 = MlasLoadFloat16x8(PackedB + 32); + float16x8_t b21 = MlasLoadFloat16x8(PackedB + 40); + float16x8_t b30 = MlasLoadFloat16x8(PackedB + 48); + float16x8_t b31 = MlasLoadFloat16x8(PackedB + 56); + float16x8_t b40 = MlasLoadFloat16x8(PackedB + 64); + float16x8_t b41 = MlasLoadFloat16x8(PackedB + 72); + float16x8_t b50 = MlasLoadFloat16x8(PackedB + 80); + float16x8_t b51 = MlasLoadFloat16x8(PackedB + 88); + float16x8_t b60 = MlasLoadFloat16x8(PackedB + 96); + float16x8_t b61 = MlasLoadFloat16x8(PackedB + 104); + float16x8_t b70 = MlasLoadFloat16x8(PackedB + 112); + float16x8_t b71 = MlasLoadFloat16x8(PackedB + 120); + float16x8_t a0 = MlasLoadFloat16x8(a); + accu0 = maq_laneq_f16_accu(accu0, b00, b10, b20, b30, b40, b50, b60, b70, a0); + accu1 = maq_laneq_f16_accu(accu1, b01, b11, b21, b31, b41, b51, b61, b71, a0); + } + + if (k & 4) { + float16x8_t b00 = MlasLoadFloat16x8(PackedB); + float16x8_t b01 = MlasLoadFloat16x8(PackedB + 8); + float16x8_t b10 = MlasLoadFloat16x8(PackedB + 16); + float16x8_t b11 = MlasLoadFloat16x8(PackedB + 24); + float16x8_t b20 = MlasLoadFloat16x8(PackedB + 32); + float16x8_t b21 = MlasLoadFloat16x8(PackedB + 40); + float16x8_t b30 = MlasLoadFloat16x8(PackedB + 48); + float16x8_t b31 = MlasLoadFloat16x8(PackedB + 56); + float16x4_t a0 = MlasLoadFloat16x4(a); + accu0 = maq_lane_f16_accu(accu0, b00, b10, b20, b30, a0); + accu1 = maq_lane_f16_accu(accu1, b01, b11, b21, b31, a0); + k -= 4, a += 4, PackedB += 4 * 16; + } + + if (k > 0) { + float16x4_t a0 = MlasLoadPartialFloat16x4(a, k); + float16x8_t b00 = MlasLoadFloat16x8(PackedB); + float16x8_t b01 = MlasLoadFloat16x8(PackedB + 8); + accu0 = vfmaq_lane_f16(accu0, b00, a0, 0); + accu1 = vfmaq_lane_f16(accu1, b01, a0, 0); + if (k > 1) { + float16x8_t b10 = MlasLoadFloat16x8(PackedB + 16); + float16x8_t b11 = MlasLoadFloat16x8(PackedB + 24); + accu0 = vfmaq_lane_f16(accu0, b10, a0, 1); + accu1 = vfmaq_lane_f16(accu1, b11, a0, 1); + } + if (k > 2) { + float16x8_t b20 = MlasLoadFloat16x8(PackedB + 32); + float16x8_t b21 = MlasLoadFloat16x8(PackedB + 40); + accu0 = vfmaq_lane_f16(accu0, b20, a0, 2); + accu1 = vfmaq_lane_f16(accu1, b21, a0, 2); + } + + PackedB += k * 16; + } + + if constexpr (beta_behavior == 1) { + float16x8_t c0 = MlasLoadFloat16x8(C); + float16x8_t c1 = MlasLoadFloat16x8(C + 8); + float16x8_t alpha_v = MlasBroadcastFloat16x8(alpha); + accu0 = vfmaq_f16(c0, accu0, alpha_v); + accu1 = vfmaq_f16(c1, accu1, alpha_v); + MlasStoreFloat16x8(C, accu0); + MlasStoreFloat16x8(C + 8, accu1); + } else if constexpr (beta_behavior == 2) { + float16x8_t c0 = MlasLoadFloat16x8(C); + float16x8_t c1 = MlasLoadFloat16x8(C + 8); + float16x8_t alpha_v = MlasBroadcastFloat16x8(alpha); + float16x8_t beta_v = MlasBroadcastFloat16x8(beta); + accu0 = vfmaq_f16(vmulq_f16(c0, beta_v), accu0, alpha_v); + accu1 = vfmaq_f16(vmulq_f16(c1, beta_v), accu1, alpha_v); + MlasStoreFloat16x8(C, accu0); + MlasStoreFloat16x8(C + 8, accu1); + } else { + float16x8_t alpha_v = MlasBroadcastFloat16x8(alpha); + accu0 = vmulq_f16(accu0, alpha_v); + accu1 = vmulq_f16(accu1, alpha_v); + MlasStoreFloat16x8(C, accu0); + MlasStoreFloat16x8(C + 8, accu1); + } + } + + if (CountN & 8) { + const auto* a = A; + size_t k = CountK; + float16x8_t accu0 = MlasZeroFloat16x8(); + for (; k >= 8; k -= 8, a += 8, PackedB += 8 * 8) { + float16x8_t b0 = MlasLoadFloat16x8(PackedB); + float16x8_t b1 = MlasLoadFloat16x8(PackedB + 8); + float16x8_t b2 = MlasLoadFloat16x8(PackedB + 16); + float16x8_t b3 = MlasLoadFloat16x8(PackedB + 24); + float16x8_t b4 = MlasLoadFloat16x8(PackedB + 32); + float16x8_t b5 = MlasLoadFloat16x8(PackedB + 40); + float16x8_t b6 = MlasLoadFloat16x8(PackedB + 48); + float16x8_t b7 = MlasLoadFloat16x8(PackedB + 56); + float16x8_t a0 = MlasLoadFloat16x8(a); + accu0 = maq_laneq_f16_accu(accu0, b0, b1, b2, b3, b4, b5, b6, b7, a0); + } + + if (k & 4) { + float16x8_t b0 = MlasLoadFloat16x8(PackedB); + float16x8_t b1 = MlasLoadFloat16x8(PackedB + 8); + float16x8_t b2 = MlasLoadFloat16x8(PackedB + 16); + float16x8_t b3 = MlasLoadFloat16x8(PackedB + 24); + float16x4_t a0 = MlasLoadFloat16x4(a); + accu0 = maq_lane_f16_accu(accu0, b0, b1, b2, b3, a0); + k -= 4, a += 4, PackedB += 4 * 8; + } + + if (k > 0) { + float16x4_t a0 = MlasLoadPartialFloat16x4(a, k); + float16x8_t b0 = MlasLoadFloat16x8(PackedB); + accu0 = vfmaq_lane_f16(accu0, b0, a0, 0); + if (k > 1) { + float16x8_t b1 = MlasLoadFloat16x8(PackedB + 8); + accu0 = vfmaq_lane_f16(accu0, b1, a0, 1); + } + if (k > 2) { + float16x8_t b2 = MlasLoadFloat16x8(PackedB + 16); + accu0 = vfmaq_lane_f16(accu0, b2, a0, 2); + } + PackedB += k * 8; + } + + if constexpr (beta_behavior == 1) { + float16x8_t c0 = MlasLoadFloat16x8(C); + float16x8_t alpha_v = MlasBroadcastFloat16x8(alpha); + accu0 = vfmaq_f16(c0, accu0, alpha_v); + MlasStoreFloat16x8(C, accu0); + } else if constexpr (beta_behavior == 2) { + float16x8_t c0 = MlasLoadFloat16x8(C); + float16x8_t alpha_v = MlasBroadcastFloat16x8(alpha); + float16x8_t beta_v = MlasBroadcastFloat16x8(beta); + accu0 = vfmaq_f16(vmulq_f16(c0, beta_v), accu0, alpha_v); + MlasStoreFloat16x8(C, accu0); + } else { + float16x8_t alpha_v = MlasBroadcastFloat16x8(alpha); + accu0 = vmulq_f16(accu0, alpha_v); + MlasStoreFloat16x8(C, accu0); + } + + CountN -= 8, C += 8; + } + + if (CountN > 0) { + const auto* a = A; + size_t k = CountK; + float16x8_t accu0 = MlasZeroFloat16x8(); + for (; k >= 8; k -= 8, a += 8, PackedB += 8 * 8) { + float16x8_t b0 = MlasLoadFloat16x8(PackedB); + float16x8_t b1 = MlasLoadFloat16x8(PackedB + 8); + float16x8_t b2 = MlasLoadFloat16x8(PackedB + 16); + float16x8_t b3 = MlasLoadFloat16x8(PackedB + 24); + float16x8_t b4 = MlasLoadFloat16x8(PackedB + 32); + float16x8_t b5 = MlasLoadFloat16x8(PackedB + 40); + float16x8_t b6 = MlasLoadFloat16x8(PackedB + 48); + float16x8_t b7 = MlasLoadFloat16x8(PackedB + 56); + float16x8_t a0 = MlasLoadFloat16x8(a); + accu0 = maq_laneq_f16_accu(accu0, b0, b1, b2, b3, b4, b5, b6, b7, a0); + } + + if (k & 4) { + float16x8_t b0 = MlasLoadFloat16x8(PackedB); + float16x8_t b1 = MlasLoadFloat16x8(PackedB + 8); + float16x8_t b2 = MlasLoadFloat16x8(PackedB + 16); + float16x8_t b3 = MlasLoadFloat16x8(PackedB + 24); + float16x4_t a0 = MlasLoadFloat16x4(a); + accu0 = maq_lane_f16_accu(accu0, b0, b1, b2, b3, a0); + k -= 4, a += 4, PackedB += 4 * 8; + } + + if (k > 0) { + float16x4_t a0 = MlasLoadPartialFloat16x4(a, k); + float16x8_t b0 = MlasLoadFloat16x8(PackedB); + accu0 = vfmaq_lane_f16(accu0, b0, a0, 0); + if (k > 1) { + float16x8_t b1 = MlasLoadFloat16x8(PackedB + 8); + accu0 = vfmaq_lane_f16(accu0, b1, a0, 1); + } + if (k > 2) { + float16x8_t b2 = MlasLoadFloat16x8(PackedB + 16); + accu0 = vfmaq_lane_f16(accu0, b2, a0, 2); + } + PackedB += k * 8; + } + + float16x4_t accu_low = vget_low_f16(accu0); + float16x4_t accu_high = vget_high_f16(accu0); + + if (CountN & 4) { + if constexpr (beta_behavior == 1) { + float16x4_t c0 = MlasLoadFloat16x4(C); + float16x4_t alpha_v = MlasBroadcastFloat16x4(alpha); + MlasStoreFloat16x4(C, vfma_f16(c0, accu_low, alpha_v)); + } else if constexpr (beta_behavior == 2) { + float16x4_t c0 = MlasLoadFloat16x4(C); + float16x4_t alpha_v = MlasBroadcastFloat16x4(alpha); + float16x4_t beta_v = MlasBroadcastFloat16x4(beta); + MlasStoreFloat16x4(C, vfma_f16(vmul_f16(c0, beta_v), accu_low, alpha_v)); + } else { + float16x4_t alpha_v = MlasBroadcastFloat16x4(alpha); + MlasStoreFloat16x4(C, vmul_f16(accu_low, alpha_v)); + } + + CountN -= 4, C += 4; + accu_low = accu_high; + } + + if (CountN) { + if constexpr (beta_behavior == 1) { + float16x4_t c0 = MlasLoadPartialFloat16x4(C, CountN); + float16x4_t alpha_v = MlasBroadcastFloat16x4(alpha); + MlasStorePartialFloat16x4(C, vfma_f16(c0, accu_low, alpha_v), CountN); + } else if constexpr (beta_behavior == 2) { + float16x4_t c0 = MlasLoadPartialFloat16x4(C, CountN); + float16x4_t alpha_v = MlasBroadcastFloat16x4(alpha); + float16x4_t beta_v = MlasBroadcastFloat16x4(beta); + MlasStorePartialFloat16x4(C, vfma_f16(vmul_f16(c0, beta_v), accu_low, alpha_v), CountN); + } else { + float16x4_t alpha_v = MlasBroadcastFloat16x4(alpha); + MlasStorePartialFloat16x4(C, vmul_f16(accu_low, alpha_v), CountN); + } + } + } +} + +template // 0: beta == 0, 1: beta == 1, 2: beta != 0 && beta != 1 +void HGemm_TransposedPackedB_Kernel_M2( + const _mlas_fp16_* A, + const _mlas_fp16_* PackedB, + _mlas_fp16_* C, + size_t CountN, + size_t CountK, + size_t lda, + size_t ldc, + _mlas_fp16_ alpha, + _mlas_fp16_ beta +) { + for (; CountN >= 16; CountN -= 16, C += 16) { + const auto* a = A; + size_t k = CountK; + float16x8_t accu00 = MlasZeroFloat16x8(); + float16x8_t accu01 = MlasZeroFloat16x8(); + float16x8_t accu10 = MlasZeroFloat16x8(); + float16x8_t accu11 = MlasZeroFloat16x8(); + for (; k >= 8; k -= 8, a += 8, PackedB += 8 * 16) { + float16x8_t b00 = MlasLoadFloat16x8(PackedB); + float16x8_t b01 = MlasLoadFloat16x8(PackedB + 8); + float16x8_t b10 = MlasLoadFloat16x8(PackedB + 16); + float16x8_t b11 = MlasLoadFloat16x8(PackedB + 24); + float16x8_t b20 = MlasLoadFloat16x8(PackedB + 32); + float16x8_t b21 = MlasLoadFloat16x8(PackedB + 40); + float16x8_t b30 = MlasLoadFloat16x8(PackedB + 48); + float16x8_t b31 = MlasLoadFloat16x8(PackedB + 56); + float16x8_t b40 = MlasLoadFloat16x8(PackedB + 64); + float16x8_t b41 = MlasLoadFloat16x8(PackedB + 72); + float16x8_t b50 = MlasLoadFloat16x8(PackedB + 80); + float16x8_t b51 = MlasLoadFloat16x8(PackedB + 88); + float16x8_t b60 = MlasLoadFloat16x8(PackedB + 96); + float16x8_t b61 = MlasLoadFloat16x8(PackedB + 104); + float16x8_t b70 = MlasLoadFloat16x8(PackedB + 112); + float16x8_t b71 = MlasLoadFloat16x8(PackedB + 120); + float16x8_t a0 = MlasLoadFloat16x8(a); + float16x8_t a1 = MlasLoadFloat16x8(a + lda); + accu00 = maq_laneq_f16_accu(accu00, b00, b10, b20, b30, b40, b50, b60, b70, a0); + accu01 = maq_laneq_f16_accu(accu01, b01, b11, b21, b31, b41, b51, b61, b71, a0); + accu10 = maq_laneq_f16_accu(accu10, b00, b10, b20, b30, b40, b50, b60, b70, a1); + accu11 = maq_laneq_f16_accu(accu11, b01, b11, b21, b31, b41, b51, b61, b71, a1); + } + + if (k & 4) { + float16x8_t b00 = MlasLoadFloat16x8(PackedB); + float16x8_t b01 = MlasLoadFloat16x8(PackedB + 8); + float16x8_t b10 = MlasLoadFloat16x8(PackedB + 16); + float16x8_t b11 = MlasLoadFloat16x8(PackedB + 24); + float16x8_t b20 = MlasLoadFloat16x8(PackedB + 32); + float16x8_t b21 = MlasLoadFloat16x8(PackedB + 40); + float16x8_t b30 = MlasLoadFloat16x8(PackedB + 48); + float16x8_t b31 = MlasLoadFloat16x8(PackedB + 56); + float16x4_t a0 = MlasLoadFloat16x4(a); + float16x4_t a1 = MlasLoadFloat16x4(a + lda); + accu00 = maq_lane_f16_accu(accu00, b00, b10, b20, b30, a0); + accu01 = maq_lane_f16_accu(accu01, b01, b11, b21, b31, a0); + accu10 = maq_lane_f16_accu(accu10, b00, b10, b20, b30, a1); + accu11 = maq_lane_f16_accu(accu11, b01, b11, b21, b31, a1); + k -= 4, a += 4, PackedB += 4 * 16; + } + + if (k > 0) { + float16x4_t a0 = MlasLoadPartialFloat16x4(a, k); + float16x4_t a1 = MlasLoadPartialFloat16x4(a + lda, k); + float16x8_t b00 = MlasLoadFloat16x8(PackedB); + float16x8_t b01 = MlasLoadFloat16x8(PackedB + 8); + accu00 = vfmaq_lane_f16(accu00, b00, a0, 0); + accu01 = vfmaq_lane_f16(accu01, b01, a0, 0); + accu10 = vfmaq_lane_f16(accu10, b00, a1, 0); + accu11 = vfmaq_lane_f16(accu11, b01, a1, 0); + if (k > 1) { + float16x8_t b10 = MlasLoadFloat16x8(PackedB + 16); + float16x8_t b11 = MlasLoadFloat16x8(PackedB + 24); + accu00 = vfmaq_lane_f16(accu00, b10, a0, 1); + accu01 = vfmaq_lane_f16(accu01, b11, a0, 1); + accu10 = vfmaq_lane_f16(accu10, b10, a1, 1); + accu11 = vfmaq_lane_f16(accu11, b11, a1, 1); + } + if (k > 2) { + float16x8_t b20 = MlasLoadFloat16x8(PackedB + 32); + float16x8_t b21 = MlasLoadFloat16x8(PackedB + 40); + accu00 = vfmaq_lane_f16(accu00, b20, a0, 2); + accu01 = vfmaq_lane_f16(accu01, b21, a0, 2); + accu10 = vfmaq_lane_f16(accu10, b20, a1, 2); + accu11 = vfmaq_lane_f16(accu11, b21, a1, 2); + } + PackedB += k * 16; + } + + if constexpr (beta_behavior == 1) { + float16x8_t c00 = MlasLoadFloat16x8(C); + float16x8_t c01 = MlasLoadFloat16x8(C + 8); + float16x8_t c10 = MlasLoadFloat16x8(C + ldc); + float16x8_t c11 = MlasLoadFloat16x8(C + ldc + 8); + float16x8_t alpha_v = MlasBroadcastFloat16x8(alpha); + accu00 = vfmaq_f16(c00, accu00, alpha_v); + accu01 = vfmaq_f16(c01, accu01, alpha_v); + accu10 = vfmaq_f16(c10, accu10, alpha_v); + accu11 = vfmaq_f16(c11, accu11, alpha_v); + MlasStoreFloat16x8(C, accu00); + MlasStoreFloat16x8(C + 8, accu01); + MlasStoreFloat16x8(C + ldc, accu10); + MlasStoreFloat16x8(C + ldc + 8, accu11); + } else if constexpr (beta_behavior == 2) { + float16x8_t c00 = MlasLoadFloat16x8(C); + float16x8_t c01 = MlasLoadFloat16x8(C + 8); + float16x8_t c10 = MlasLoadFloat16x8(C + ldc); + float16x8_t c11 = MlasLoadFloat16x8(C + ldc + 8); + float16x8_t alpha_v = MlasBroadcastFloat16x8(alpha); + float16x8_t beta_v = MlasBroadcastFloat16x8(beta); + accu00 = vfmaq_f16(vmulq_f16(c00, beta_v), accu00, alpha_v); + accu01 = vfmaq_f16(vmulq_f16(c01, beta_v), accu01, alpha_v); + accu10 = vfmaq_f16(vmulq_f16(c10, beta_v), accu10, alpha_v); + accu11 = vfmaq_f16(vmulq_f16(c11, beta_v), accu11, alpha_v); + MlasStoreFloat16x8(C, accu00); + MlasStoreFloat16x8(C + 8, accu01); + MlasStoreFloat16x8(C + ldc, accu10); + MlasStoreFloat16x8(C + ldc + 8, accu11); + } else { + float16x8_t alpha_v = MlasBroadcastFloat16x8(alpha); + accu00 = vmulq_f16(accu00, alpha_v); + accu01 = vmulq_f16(accu01, alpha_v); + accu10 = vmulq_f16(accu10, alpha_v); + accu11 = vmulq_f16(accu11, alpha_v); + MlasStoreFloat16x8(C, accu00); + MlasStoreFloat16x8(C + 8, accu01); + MlasStoreFloat16x8(C + ldc, accu10); + MlasStoreFloat16x8(C + ldc + 8, accu11); + } + } + + if (CountN & 8) { + const auto* a = A; + size_t k = CountK; + float16x8_t accu00 = MlasZeroFloat16x8(); + float16x8_t accu10 = MlasZeroFloat16x8(); + for (; k >= 8; k -= 8, a += 8, PackedB += 8 * 8) { + float16x8_t b0 = MlasLoadFloat16x8(PackedB); + float16x8_t b1 = MlasLoadFloat16x8(PackedB + 8); + float16x8_t b2 = MlasLoadFloat16x8(PackedB + 16); + float16x8_t b3 = MlasLoadFloat16x8(PackedB + 24); + float16x8_t b4 = MlasLoadFloat16x8(PackedB + 32); + float16x8_t b5 = MlasLoadFloat16x8(PackedB + 40); + float16x8_t b6 = MlasLoadFloat16x8(PackedB + 48); + float16x8_t b7 = MlasLoadFloat16x8(PackedB + 56); + float16x8_t a0 = MlasLoadFloat16x8(a); + float16x8_t a1 = MlasLoadFloat16x8(a + lda); + accu00 = maq_laneq_f16_accu(accu00, b0, b1, b2, b3, b4, b5, b6, b7, a0); + accu10 = maq_laneq_f16_accu(accu10, b0, b1, b2, b3, b4, b5, b6, b7, a1); + } + + if (k & 4) { + float16x8_t b0 = MlasLoadFloat16x8(PackedB); + float16x8_t b1 = MlasLoadFloat16x8(PackedB + 8); + float16x8_t b2 = MlasLoadFloat16x8(PackedB + 16); + float16x8_t b3 = MlasLoadFloat16x8(PackedB + 24); + float16x4_t a0 = MlasLoadFloat16x4(a); + float16x4_t a1 = MlasLoadFloat16x4(a + lda); + accu00 = maq_lane_f16_accu(accu00, b0, b1, b2, b3, a0); + accu10 = maq_lane_f16_accu(accu10, b0, b1, b2, b3, a1); + k -= 4, a += 4, PackedB += 4 * 8; + } + + if (k > 0) { + float16x4_t a0 = MlasLoadPartialFloat16x4(a, k); + float16x4_t a1 = MlasLoadPartialFloat16x4(a + lda, k); + float16x8_t b0 = MlasLoadFloat16x8(PackedB); + accu00 = vfmaq_lane_f16(accu00, b0, a0, 0); + accu10 = vfmaq_lane_f16(accu10, b0, a1, 0); + if (k > 1) { + float16x8_t b1 = MlasLoadFloat16x8(PackedB + 8); + accu00 = vfmaq_lane_f16(accu00, b1, a0, 1); + accu10 = vfmaq_lane_f16(accu10, b1, a1, 1); + } + if (k > 2) { + float16x8_t b2 = MlasLoadFloat16x8(PackedB + 16); + accu00 = vfmaq_lane_f16(accu00, b2, a0, 2); + accu10 = vfmaq_lane_f16(accu10, b2, a1, 2); + } + PackedB += k * 8; + } + + if constexpr (beta_behavior == 1) { + float16x8_t c0 = MlasLoadFloat16x8(C); + float16x8_t c1 = MlasLoadFloat16x8(C + ldc); + float16x8_t alpha_v = MlasBroadcastFloat16x8(alpha); + accu00 = vfmaq_f16(c0, accu00, alpha_v); + accu10 = vfmaq_f16(c1, accu10, alpha_v); + MlasStoreFloat16x8(C, accu00); + MlasStoreFloat16x8(C + ldc, accu10); + } else if constexpr (beta_behavior == 2) { + float16x8_t c0 = MlasLoadFloat16x8(C); + float16x8_t c1 = MlasLoadFloat16x8(C + ldc); + float16x8_t alpha_v = MlasBroadcastFloat16x8(alpha); + float16x8_t beta_v = MlasBroadcastFloat16x8(beta); + accu00 = vfmaq_f16(vmulq_f16(c0, beta_v), accu00, alpha_v); + accu10 = vfmaq_f16(vmulq_f16(c1, beta_v), accu10, alpha_v); + MlasStoreFloat16x8(C, accu00); + MlasStoreFloat16x8(C + ldc, accu10); + } else { + float16x8_t alpha_v = MlasBroadcastFloat16x8(alpha); + accu00 = vmulq_f16(accu00, alpha_v); + accu10 = vmulq_f16(accu10, alpha_v); + MlasStoreFloat16x8(C, accu00); + MlasStoreFloat16x8(C + ldc, accu10); + } + + CountN -= 8, C += 8; + } + + if (CountN > 0) { + const auto* a = A; + size_t k = CountK; + float16x8_t accu0 = MlasZeroFloat16x8(); + float16x8_t accu1 = MlasZeroFloat16x8(); + for (; k >= 8; k -= 8, a += 8, PackedB += 8 * 8) { + float16x8_t b0 = MlasLoadFloat16x8(PackedB); + float16x8_t b1 = MlasLoadFloat16x8(PackedB + 8); + float16x8_t b2 = MlasLoadFloat16x8(PackedB + 16); + float16x8_t b3 = MlasLoadFloat16x8(PackedB + 24); + float16x8_t b4 = MlasLoadFloat16x8(PackedB + 32); + float16x8_t b5 = MlasLoadFloat16x8(PackedB + 40); + float16x8_t b6 = MlasLoadFloat16x8(PackedB + 48); + float16x8_t b7 = MlasLoadFloat16x8(PackedB + 56); + float16x8_t a0 = MlasLoadFloat16x8(a); + float16x8_t a1 = MlasLoadFloat16x8(a + lda); + accu0 = maq_laneq_f16_accu(accu0, b0, b1, b2, b3, b4, b5, b6, b7, a0); + accu1 = maq_laneq_f16_accu(accu1, b0, b1, b2, b3, b4, b5, b6, b7, a1); + } + + if (k & 4) { + float16x8_t b0 = MlasLoadFloat16x8(PackedB); + float16x8_t b1 = MlasLoadFloat16x8(PackedB + 8); + float16x8_t b2 = MlasLoadFloat16x8(PackedB + 16); + float16x8_t b3 = MlasLoadFloat16x8(PackedB + 24); + float16x4_t a0 = MlasLoadFloat16x4(a); + float16x4_t a1 = MlasLoadFloat16x4(a + lda); + accu0 = maq_lane_f16_accu(accu0, b0, b1, b2, b3, a0); + accu1 = maq_lane_f16_accu(accu1, b0, b1, b2, b3, a1); + k -= 4, a += 4, PackedB += 4 * 8; + } + + if (k > 0) { + float16x4_t a0 = MlasLoadPartialFloat16x4(a, k); + float16x4_t a1 = MlasLoadPartialFloat16x4(a + lda, k); + float16x8_t b0 = MlasLoadFloat16x8(PackedB); + accu0 = vfmaq_lane_f16(accu0, b0, a0, 0); + accu1 = vfmaq_lane_f16(accu1, b0, a1, 0); + if (k > 1) { + float16x8_t b1 = MlasLoadFloat16x8(PackedB + 8); + accu0 = vfmaq_lane_f16(accu0, b1, a0, 1); + accu1 = vfmaq_lane_f16(accu1, b1, a1, 1); + } + if (k > 2) { + float16x8_t b2 = MlasLoadFloat16x8(PackedB + 16); + accu0 = vfmaq_lane_f16(accu0, b2, a0, 2); + accu1 = vfmaq_lane_f16(accu1, b2, a1, 2); + } + PackedB += k * 8; + } + + float16x4_t accu0_low = vget_low_f16(accu0); + float16x4_t accu0_high = vget_high_f16(accu0); + float16x4_t accu1_low = vget_low_f16(accu1); + float16x4_t accu1_high = vget_high_f16(accu1); + + if (CountN & 4) { + if constexpr (beta_behavior == 1) { + float16x4_t c0 = MlasLoadFloat16x4(C); + float16x4_t c1 = MlasLoadFloat16x4(C + ldc); + float16x4_t alpha_v = MlasBroadcastFloat16x4(alpha); + MlasStoreFloat16x4(C, vfma_f16(c0, accu0_low, alpha_v)); + MlasStoreFloat16x4(C + ldc, vfma_f16(c1, accu1_low, alpha_v)); + } else if constexpr (beta_behavior == 2) { + float16x4_t c0 = MlasLoadFloat16x4(C); + float16x4_t c1 = MlasLoadFloat16x4(C + ldc); + float16x4_t alpha_v = MlasBroadcastFloat16x4(alpha); + float16x4_t beta_v = MlasBroadcastFloat16x4(beta); + MlasStoreFloat16x4(C, vfma_f16(vmul_f16(c0, beta_v), accu0_low, alpha_v)); + MlasStoreFloat16x4(C + ldc, vfma_f16(vmul_f16(c1, beta_v), accu1_low, alpha_v)); + } else { + float16x4_t alpha_v = MlasBroadcastFloat16x4(alpha); + MlasStoreFloat16x4(C, vmul_f16(accu0_low, alpha_v)); + MlasStoreFloat16x4(C + ldc, vmul_f16(accu1_low, alpha_v)); + } + CountN -= 4, C += 4; + accu0_low = accu0_high; + accu1_low = accu1_high; + } + + if (CountN) { + if constexpr (beta_behavior == 1) { + float16x4_t c0 = MlasLoadPartialFloat16x4(C, CountN); + float16x4_t c1 = MlasLoadPartialFloat16x4(C + ldc, CountN); + float16x4_t alpha_v = MlasBroadcastFloat16x4(alpha); + MlasStorePartialFloat16x4(C, vfma_f16(c0, accu0_low, alpha_v), CountN); + MlasStorePartialFloat16x4(C + ldc, vfma_f16(c1, accu1_low, alpha_v), CountN); + } else if constexpr (beta_behavior == 2) { + float16x4_t c0 = MlasLoadPartialFloat16x4(C, CountN); + float16x4_t c1 = MlasLoadPartialFloat16x4(C + ldc, CountN); + float16x4_t alpha_v = MlasBroadcastFloat16x4(alpha); + float16x4_t beta_v = MlasBroadcastFloat16x4(beta); + MlasStorePartialFloat16x4(C, vfma_f16(vmul_f16(c0, beta_v), accu0_low, alpha_v), CountN); + MlasStorePartialFloat16x4(C + ldc, vfma_f16(vmul_f16(c1, beta_v), accu1_low, alpha_v), CountN); + } else { + float16x4_t alpha_v = MlasBroadcastFloat16x4(alpha); + MlasStorePartialFloat16x4(C, vmul_f16(accu0_low, alpha_v), CountN); + MlasStorePartialFloat16x4(C + ldc, vmul_f16(accu1_low, alpha_v), CountN); + } + } + } +} + +void HGemm_TransposedPackedB_Kernel( + const MLAS_FP16* A, + const MLAS_FP16* PackedB, + MLAS_FP16* C, + size_t CountM, + size_t CountN, + size_t CountK, + size_t lda, + size_t ldc, + _mlas_fp16_ alpha, + _mlas_fp16_ beta +) { + if (CountM > 2) { + MLAS_THROW_EX(std::runtime_error, "HGemm_TransposedPackedB_Kernel only support <= 2 rows"); + } + + const auto* A_data = reinterpret_cast(A); + const auto* PackedB_data = reinterpret_cast(PackedB); + auto* C_data = reinterpret_cast<_mlas_fp16_*>(C); + const auto f16_0 = MLAS_FP16(0.0f); + const auto f16_1 = MLAS_FP16(1.0f); + if (CountM == 1) { + if (beta == f16_0.val) { + HGemm_TransposedPackedB_Kernel_M1<0>(A_data, PackedB_data, C_data, CountN, CountK, alpha, beta); + } else if (beta == f16_1.val) { + HGemm_TransposedPackedB_Kernel_M1<1>(A_data, PackedB_data, C_data, CountN, CountK, alpha, beta); + } else { + HGemm_TransposedPackedB_Kernel_M1<2>(A_data, PackedB_data, C_data, CountN, CountK, alpha, beta); + } + } else { + if (beta == f16_0.val) { + HGemm_TransposedPackedB_Kernel_M2<0>(A_data, PackedB_data, C_data, CountN, CountK, lda, ldc, alpha, beta); + } else if (beta == f16_1.val) { + HGemm_TransposedPackedB_Kernel_M2<1>(A_data, PackedB_data, C_data, CountN, CountK, lda, ldc, alpha, beta); + } else { + HGemm_TransposedPackedB_Kernel_M2<2>(A_data, PackedB_data, C_data, CountN, CountK, lda, ldc, alpha, beta); + } + } +} + +} // namespace hgemm_neon diff --git a/onnxruntime/core/mlas/lib/hgemm_kernel_neon.cpp b/onnxruntime/core/mlas/lib/hgemm_kernel_neon.cpp new file mode 100644 index 0000000000000..5b131a8e41f21 --- /dev/null +++ b/onnxruntime/core/mlas/lib/hgemm_kernel_neon.cpp @@ -0,0 +1,28 @@ +/*++ + +Copyright (c) Microsoft Corporation. All rights reserved. + +Licensed under the MIT License. + +Module Name: + + hgemm_kernel_neon.cpp + +Abstract: + + This module implements half precision GEMM kernel for neon. + +--*/ + +#include "mlasi.h" +#include "halfgemm.h" + +const MLAS_HGEMM_DISPATCH MlasHGemmDispatchNeon = [](){ + MLAS_HGEMM_DISPATCH d; +#if defined(MLAS_F16VEC_INTRINSICS_SUPPORTED) && defined(MLAS_TARGET_ARM64) + d.HPackBKernel_TransposedB = hgemm_neon::HPackB_TransposedB_Kernel; + d.HGemmKernel_TransposedB = hgemm_neon::HGemm_TransposedB_Kernel; + d.HGemmKernel_TransposedPackedB = hgemm_neon::HGemm_TransposedPackedB_Kernel; +#endif + return d; +}(); diff --git a/onnxruntime/core/mlas/lib/hqnbitgemm_kernel_neon_fp16.cpp b/onnxruntime/core/mlas/lib/hqnbitgemm_kernel_neon_fp16.cpp index 69e37d2b916d1..5b1f9d7d4a2dc 100644 --- a/onnxruntime/core/mlas/lib/hqnbitgemm_kernel_neon_fp16.cpp +++ b/onnxruntime/core/mlas/lib/hqnbitgemm_kernel_neon_fp16.cpp @@ -93,39 +93,6 @@ Transpose8x8(uint8x8_t& v0, uint8x8_t& v1, uint8x8_t& v2, uint8x8_t& v3, v7 = vreinterpret_u8_u32(c3.val[1]); } -MLAS_FORCEINLINE void -Transpose4x8(float16x8_t& v0, float16x8_t& v1, float16x8_t& v2, float16x8_t& v3) -{ - // |v00|v01|v02|v03|v04|v05|v06|v07| - // |v10|v11|v12|v13|v14|v15|v16|v17| - // |v20|v21|v22|v23|v24|v25|v26|v27| - // |v30|v31|v32|v33|v34|v35|v36|v37| - // => - // |v00|v10|v20|v30|v04|v14|v24|v34| - // |v01|v11|v21|v31|v05|v15|v25|v35| - // |v02|v12|v22|v32|v06|v16|v26|v36| - // |v03|v13|v23|v33|v07|v17|v27|v37| - float16x8x2_t t01 = vtrnq_f16(v0, v1); - float16x8x2_t t23 = vtrnq_f16(v2, v3); - - v0 = vreinterpretq_f16_f32(vtrn1q_f32(vreinterpretq_f32_f16(t01.val[0]), vreinterpretq_f32_f16(t23.val[0]))); - v1 = vreinterpretq_f16_f32(vtrn1q_f32(vreinterpretq_f32_f16(t01.val[1]), vreinterpretq_f32_f16(t23.val[1]))); - v2 = vreinterpretq_f16_f32(vtrn2q_f32(vreinterpretq_f32_f16(t01.val[0]), vreinterpretq_f32_f16(t23.val[0]))); - v3 = vreinterpretq_f16_f32(vtrn2q_f32(vreinterpretq_f32_f16(t01.val[1]), vreinterpretq_f32_f16(t23.val[1]))); -} - -MLAS_FORCEINLINE void -Transpose4x4(float16x4_t& v0, float16x4_t& v1, float16x4_t& v2, float16x4_t& v3) -{ - float16x4x2_t t01 = vtrn_f16(v0, v1); - float16x4x2_t t23 = vtrn_f16(v2, v3); - - v0 = vreinterpret_f16_f32(vtrn1_f32(vreinterpret_f32_f16(t01.val[0]), vreinterpret_f32_f16(t23.val[0]))); - v1 = vreinterpret_f16_f32(vtrn1_f32(vreinterpret_f32_f16(t01.val[1]), vreinterpret_f32_f16(t23.val[1]))); - v2 = vreinterpret_f16_f32(vtrn2_f32(vreinterpret_f32_f16(t01.val[0]), vreinterpret_f32_f16(t23.val[0]))); - v3 = vreinterpret_f16_f32(vtrn2_f32(vreinterpret_f32_f16(t01.val[1]), vreinterpret_f32_f16(t23.val[1]))); -} - void HQ4BitGemmPackQuantBData_CompFp16( size_t N, diff --git a/onnxruntime/core/mlas/lib/mlasi.h b/onnxruntime/core/mlas/lib/mlasi.h index 100d7d47751aa..56fad6bb3412a 100644 --- a/onnxruntime/core/mlas/lib/mlasi.h +++ b/onnxruntime/core/mlas/lib/mlasi.h @@ -301,6 +301,8 @@ static_assert(sizeof(MLAS_FP16) == FP16_SIZE); // Define the default strides to step through slices of the input matrices. // +#define MLAS_HGEMM_STRIDEN 32 +#define MLAS_HGEMM_STRIDEK 512 #define MLAS_SGEMM_STRIDEN 128 #define MLAS_SGEMM_STRIDEK 128 #define MLAS_SGEMM_PACKED_STRIDEN 128 @@ -317,6 +319,7 @@ static_assert(sizeof(MLAS_FP16) == FP16_SIZE); // the effort at this time. // +#define MLAS_HGEMM_STRIDEN_THREAD_ALIGN 16 #define MLAS_SGEMM_STRIDEN_THREAD_ALIGN 16 #define MLAS_DGEMM_STRIDEN_THREAD_ALIGN 8 #define MLAS_QGEMM_STRIDEN_THREAD_ALIGN 16 @@ -944,6 +947,7 @@ extern "C" { #define MLAS_SGEMM_THREAD_COMPLEXITY (size_t(64) * size_t(1024)) #define MLAS_DGEMM_THREAD_COMPLEXITY (size_t(64) * size_t(1024)) #define MLAS_QGEMM_THREAD_COMPLEXITY 65536 +#define MLAS_HGEMM_THREAD_COMPLEXITY 65536 #if defined(__aarch64__) && defined(__linux__) #define MLAS_SBGEMM_THREAD_COMPLEXITY (size_t(64) * size_t(1024)) @@ -1055,6 +1059,12 @@ extern const MLAS_QNBIT_GEMM_DISPATCH MlasSQNBitGemmDispatchAvx512vnni; struct MLAS_ROPE_DISPATCH; extern const MLAS_ROPE_DISPATCH MlasRopeDispatchNeon; +// +// half gemm dispatch structure +// +struct MLAS_HGEMM_DISPATCH; +extern const MLAS_HGEMM_DISPATCH MlasHGemmDispatchNeon; + // // Quantized depthwise convolution kernels. @@ -1217,6 +1227,7 @@ struct MLAS_PLATFORM { MLAS_CAST_F32_TO_F16_KERNEL* CastF32ToF16Kernel; const MLAS_ROPE_DISPATCH* RopeDispatch{nullptr}; + const MLAS_HGEMM_DISPATCH* HGemmDispatch{nullptr}; }; inline diff --git a/onnxruntime/core/mlas/lib/platform.cpp b/onnxruntime/core/mlas/lib/platform.cpp index ec572a4150292..026a954bbc6c2 100644 --- a/onnxruntime/core/mlas/lib/platform.cpp +++ b/onnxruntime/core/mlas/lib/platform.cpp @@ -544,6 +544,7 @@ Return Value: this->ConvSymS8S8Dispatch = &MlasConvSymS8DispatchNeon; this->QNBitGemmDispatch = &MlasSQNBitGemmDispatchNeon; this->RopeDispatch = &MlasRopeDispatchNeon; + this->HGemmDispatch = &MlasHGemmDispatchNeon; // // Check if the processor supports ASIMD dot product instructions. diff --git a/onnxruntime/core/optimizer/identical_children_consolidation.cc b/onnxruntime/core/optimizer/identical_children_consolidation.cc index 350da9605a13d..bbc8073268f08 100644 --- a/onnxruntime/core/optimizer/identical_children_consolidation.cc +++ b/onnxruntime/core/optimizer/identical_children_consolidation.cc @@ -45,7 +45,7 @@ std::vector> IdenticalChildrenConsolidation::DivideIdenti const Graph& graph, Node* node, const string_view& op) { - unordered_map> identical_children_map; + unordered_map> identical_children_map; for (auto i = node->OutputEdgesBegin(); i != node->OutputEdgesEnd(); ++i) { if (i->GetNode().OpType() == op) { identical_children_map[IdentityBuilder(graph, i->GetNode())].push_back(i->GetNode().Index()); @@ -125,4 +125,4 @@ std::string IdenticalChildrenConsolidation::IdentityBuilder(const Graph& graph, return identity.str(); } -} // namespace onnxruntime \ No newline at end of file +} // namespace onnxruntime diff --git a/onnxruntime/core/optimizer/qdq_transformer/selectors_actions/qdq_selector_action_transformer.cc b/onnxruntime/core/optimizer/qdq_transformer/selectors_actions/qdq_selector_action_transformer.cc index 2f98711771f1b..ae89af1f256d1 100644 --- a/onnxruntime/core/optimizer/qdq_transformer/selectors_actions/qdq_selector_action_transformer.cc +++ b/onnxruntime/core/optimizer/qdq_transformer/selectors_actions/qdq_selector_action_transformer.cc @@ -77,7 +77,8 @@ void DropQDQNodesRules(SelectorActionRegistry& qdq_selector_action_registry) { true, cpu_ep); qdq_selector_action_registry.RegisterSelectorAndAction(drop_action_no_int16_name, - {{"Resize", {}}}, + {{"DepthToSpace", {}}, + {"Resize", {}}}, std::move(selector_no_16bit), std::move(drop_action_no_int16)); @@ -91,7 +92,7 @@ void DropQDQNodesRules(SelectorActionRegistry& qdq_selector_action_registry) { std::move(drop_action_no_int16_and_positive_scale)); std::unique_ptr selector = std::make_unique(true, false, true, providers); - // DepthToSpace and SpaceToDepth not included because there are no integer implementations. + // SpaceToDepth not included because there are no integer implementations. // https://github.com/microsoft/onnxruntime/issues/21287 qdq_selector_action_registry.RegisterSelectorAndAction(drop_action_name, {{"Expand", {}}, diff --git a/onnxruntime/core/optimizer/selectors_actions/selector_action_transformer.cc b/onnxruntime/core/optimizer/selectors_actions/selector_action_transformer.cc index b68cbaf85bcff..b1d6c51f693fd 100644 --- a/onnxruntime/core/optimizer/selectors_actions/selector_action_transformer.cc +++ b/onnxruntime/core/optimizer/selectors_actions/selector_action_transformer.cc @@ -147,7 +147,7 @@ static Status MatchAndProcess( RuntimeOptimizationRecord::ProducedOpIdVector produced_op_ids{}; produced_op_ids.reserve(action_saved_state.produced_node_op_schemas.size()); - for (const auto op_schema : action_saved_state.produced_node_op_schemas) { + for (const auto& op_schema : action_saved_state.produced_node_op_schemas) { produced_op_ids.push_back(utils::MakeOpId(*op_schema)); if (save_context->record_produced_node_op_schema) { status = save_context->record_produced_node_op_schema(*op_schema); diff --git a/onnxruntime/core/platform/windows/logging/etw_sink.cc b/onnxruntime/core/platform/windows/logging/etw_sink.cc index 950ac247a2046..489cd19b11302 100644 --- a/onnxruntime/core/platform/windows/logging/etw_sink.cc +++ b/onnxruntime/core/platform/windows/logging/etw_sink.cc @@ -64,6 +64,10 @@ EtwRegistrationManager& EtwRegistrationManager::Instance() { return instance; } +bool EtwRegistrationManager::SupportsETW() { + return true; +} + bool EtwRegistrationManager::IsEnabled() const { std::lock_guard lock(provider_change_mutex_); return is_enabled_; @@ -248,5 +252,19 @@ void EtwSink::SendImpl(const Timestamp& timestamp, const std::string& logger_id, } } // namespace logging } // namespace onnxruntime +#else +// ETW is not supported on this platform but should still define a dummy EtwRegistrationManager +// so that it can be used in the EP provider bridge. +namespace onnxruntime { +namespace logging { +EtwRegistrationManager& EtwRegistrationManager::Instance() { + static EtwRegistrationManager instance; + return instance; +} +bool EtwRegistrationManager::SupportsETW() { + return false; +} +} // namespace logging +} // namespace onnxruntime #endif // ETW_TRACE_LOGGING_SUPPORTED diff --git a/onnxruntime/core/platform/windows/logging/etw_sink.h b/onnxruntime/core/platform/windows/logging/etw_sink.h index 2a798a28f13de..62b762886ca82 100644 --- a/onnxruntime/core/platform/windows/logging/etw_sink.h +++ b/onnxruntime/core/platform/windows/logging/etw_sink.h @@ -60,6 +60,9 @@ class EtwRegistrationManager { // Singleton instance access static EtwRegistrationManager& Instance(); + // Returns true if ETW is supported at all. + static bool SupportsETW(); + // Check if ETW logging is enabled bool IsEnabled() const; @@ -110,5 +113,33 @@ class EtwRegistrationManager { } // namespace logging } // namespace onnxruntime +#else +// ETW is not supported on this platform but should still define a dummy EtwRegistrationManager +// so that it can be used in the EP provider bridge. +#include "core/common/logging/severity.h" +namespace onnxruntime { +namespace logging { +class EtwRegistrationManager { + public: + using EtwInternalCallback = std::function; + + static EtwRegistrationManager& Instance(); + static bool SupportsETW(); + bool IsEnabled() const { return false; } + UCHAR Level() const { return 0; } + Severity MapLevelToSeverity() { return Severity::kFATAL; } + uint64_t Keyword() const { return 0; } + HRESULT Status() const { return 0; } + void RegisterInternalCallback(const EtwInternalCallback& callback) {} + void UnregisterInternalCallback(const EtwInternalCallback& callback) {} + + private: + EtwRegistrationManager() = default; + ~EtwRegistrationManager() = default; +}; +} // namespace logging +} // namespace onnxruntime #endif // ETW_TRACE_LOGGING_SUPPORTED diff --git a/onnxruntime/core/providers/cpu/activation/activations.cc b/onnxruntime/core/providers/cpu/activation/activations.cc index 049fee4b95308..71cba37b52446 100644 --- a/onnxruntime/core/providers/cpu/activation/activations.cc +++ b/onnxruntime/core/providers/cpu/activation/activations.cc @@ -33,8 +33,10 @@ namespace onnxruntime { op, since_version, type, \ KernelDefBuilder().MayInplace(0, 0).TypeConstraint("T", DataTypeImpl::GetTensorType()), op); -REGISTER_UNARY_ELEMENTWISE_KERNEL(Elu, 6); -REGISTER_UNARY_ELEMENTWISE_KERNEL(HardSigmoid, 6); +REGISTER_VERSIONED_UNARY_ELEMENTWISE_KERNEL(Elu, 6, 21); +REGISTER_UNARY_ELEMENTWISE_KERNEL(Elu, 22); +REGISTER_VERSIONED_UNARY_ELEMENTWISE_KERNEL(HardSigmoid, 6, 21); +REGISTER_UNARY_ELEMENTWISE_KERNEL(HardSigmoid, 22); REGISTER_VERSIONED_UNARY_ELEMENTWISE_KERNEL(LeakyRelu, 6, 15); REGISTER_VERSIONED_UNARY_ELEMENTWISE_TYPED_KERNEL(Relu, 6, 12, float); REGISTER_VERSIONED_UNARY_ELEMENTWISE_TYPED_KERNEL(Relu, 6, 12, double); @@ -52,19 +54,23 @@ REGISTER_VERSIONED_UNARY_ELEMENTWISE_TYPED_KERNEL(LeakyRelu, 6, 15, MLFloat16); REGISTER_UNARY_ELEMENTWISE_TYPED_KERNEL(LeakyRelu, 16, MLFloat16); #endif // MLAS_F16VEC_INTRINSICS_SUPPORTED -REGISTER_UNARY_ELEMENTWISE_KERNEL(Selu, 6); +REGISTER_VERSIONED_UNARY_ELEMENTWISE_KERNEL(Selu, 6, 21); +REGISTER_UNARY_ELEMENTWISE_KERNEL(Selu, 22); REGISTER_VERSIONED_UNARY_ELEMENTWISE_TYPED_KERNEL(Sigmoid, 6, 12, float); REGISTER_VERSIONED_UNARY_ELEMENTWISE_TYPED_KERNEL(Sigmoid, 6, 12, double); REGISTER_UNARY_ELEMENTWISE_TYPED_KERNEL(Sigmoid, 13, float); REGISTER_UNARY_ELEMENTWISE_TYPED_KERNEL(Sigmoid, 13, double); -REGISTER_UNARY_ELEMENTWISE_KERNEL(Softplus, 1); -REGISTER_UNARY_ELEMENTWISE_KERNEL(Softsign, 1); +REGISTER_VERSIONED_UNARY_ELEMENTWISE_KERNEL(Softplus, 1, 21); +REGISTER_UNARY_ELEMENTWISE_KERNEL(Softplus, 22); +REGISTER_VERSIONED_UNARY_ELEMENTWISE_KERNEL(Softsign, 1, 21); +REGISTER_UNARY_ELEMENTWISE_KERNEL(Softsign, 22); REGISTER_VERSIONED_UNARY_ELEMENTWISE_TYPED_KERNEL(Tanh, 6, 12, float); REGISTER_VERSIONED_UNARY_ELEMENTWISE_TYPED_KERNEL(Tanh, 6, 12, double); REGISTER_UNARY_ELEMENTWISE_KERNEL(Celu, 12); REGISTER_UNARY_ELEMENTWISE_TYPED_KERNEL(Tanh, 13, float); REGISTER_UNARY_ELEMENTWISE_TYPED_KERNEL(Tanh, 13, double); -REGISTER_UNARY_ELEMENTWISE_KERNEL(ThresholdedRelu, 10); +REGISTER_VERSIONED_UNARY_ELEMENTWISE_KERNEL(ThresholdedRelu, 10, 21); +REGISTER_UNARY_ELEMENTWISE_KERNEL(ThresholdedRelu, 22); // Opset-16 adds BFloat16 to allowed types for the LeakyRelu operator REGISTER_UNARY_ELEMENTWISE_KERNEL(LeakyRelu, 16); diff --git a/onnxruntime/core/providers/cpu/cpu_execution_provider.cc b/onnxruntime/core/providers/cpu/cpu_execution_provider.cc index 0499a15e1df0a..d08620ba09db0 100644 --- a/onnxruntime/core/providers/cpu/cpu_execution_provider.cc +++ b/onnxruntime/core/providers/cpu/cpu_execution_provider.cc @@ -40,8 +40,8 @@ std::vector CPUExecutionProvider::CreatePreferredAllocators() { // Forward declarations of op kernels class ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 6, 10, Clip); -class ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 6, Elu); -class ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 6, HardSigmoid); +class ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 6, 21, Elu); +class ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 6, 21, HardSigmoid); class ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 6, 15, LeakyRelu); class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 6, 12, float, Relu); class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 6, 12, double, Relu); @@ -49,11 +49,11 @@ class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOn class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 6, 12, MLFloat16, Relu); class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 6, 15, MLFloat16, LeakyRelu); #endif -class ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 6, Selu); +class ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 6, 21, Selu); class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 6, 12, float, Sigmoid); class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 6, 12, double, Sigmoid); -class ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 1, Softplus); -class ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 1, Softsign); +class ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 1, 21, Softplus); +class ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 1, 21, Softsign); class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 6, 12, float, Tanh); class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 6, 12, double, Tanh); class ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 7, 8, PRelu); @@ -84,21 +84,30 @@ class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOn class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 7, 12, double, Add); class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 7, 12, int32_t, Add); class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 7, 12, int64_t, Add); +class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 7, 12, uint32_t, Add); +class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 7, 12, uint64_t, Add); class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 7, 12, float, Sub); class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 7, 12, double, Sub); class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 7, 12, int32_t, Sub); class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 7, 12, int64_t, Sub); +class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 7, 12, uint32_t, Sub); +class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 7, 12, uint64_t, Sub); class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 7, 12, float, Mul); class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 7, 12, double, Mul); class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 7, 12, int32_t, Mul); class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 7, 12, int64_t, Mul); +class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 7, 12, uint32_t, Mul); +class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 7, 12, uint64_t, Mul); class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 7, 12, float, Div); class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 7, 12, double, Div); class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 7, 12, int32_t, Div); class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 7, 12, int64_t, Div); +class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 7, 12, uint32_t, Div); +class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 7, 12, uint64_t, Div); class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 6, 12, float, Neg); class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 6, 12, double, Neg); class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 6, 12, int8_t, Neg); +class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 6, 12, int16_t, Neg); class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 6, 12, int32_t, Neg); class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 6, 12, int64_t, Neg); class ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 7, 11, Pow); @@ -129,13 +138,13 @@ class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOn class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 7, 10, double, Equal); class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 6, 7, float, Mean); class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 8, 12, float, Mean); -class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 7, float, Sin); -class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 7, double, Sin); -class ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 7, Cos); -class ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 7, Tan); -class ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 7, Asin); -class ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 7, Acos); -class ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 7, Atan); +class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 7, 21, float, Sin); +class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 7, 21, double, Sin); +class ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 7, 21, Cos); +class ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 7, 21, Tan); +class ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 7, 21, Asin); +class ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 7, 21, Acos); +class ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 7, 21, Atan); class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 7, 8, float, Gemm); class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 7, 8, double, Gemm); class ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 1, 10, Hardmax); @@ -154,7 +163,7 @@ class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOn class ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 1, 10, Conv); class ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 1, 10, ConvTranspose); class ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 1, 8, Flatten); -class ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 6, InstanceNormalization); +class ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 6, 21, InstanceNormalization); class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 1, float, LpNormalization); class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 1, double, LpNormalization); class ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 1, 12, LRN); @@ -166,11 +175,11 @@ class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOn #endif class ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 2, 10, LpPool); class ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 2, GlobalLpPool); -class ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 1, GlobalAveragePool); +class ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 1, 21, GlobalAveragePool); #ifdef MLAS_F16VEC_INTRINSICS_SUPPORTED -class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 1, MLFloat16, GlobalAveragePool); +class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 1, 21, MLFloat16, GlobalAveragePool); #endif -class ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 1, GlobalMaxPool); +class ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 1, 21, GlobalMaxPool); class ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 1, MaxRoiPool); class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 1, 10, float, ReduceL1); class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 1, 10, double, ReduceL1); @@ -286,7 +295,7 @@ class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOn class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 9, 12, double, Less); class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 9, 12, int32_t, Less); class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 9, 12, int64_t, Less); -class ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 9, EyeLike); +class ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 9, 21, EyeLike); class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 9, 12, float, IsNaN); class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 9, 12, double, IsNaN); class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 9, 12, MLFloat16, IsNaN); @@ -316,11 +325,11 @@ class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOn class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 9, 10, int64_t_float_int32_t, OneHot); class ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 9, 10, MaxUnpool); -class ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 9, Sinh); -class ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 9, Cosh); -class ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 9, Asinh); -class ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 9, Acosh); -class ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 9, Atanh); +class ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 9, 21, Sinh); +class ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 9, 21, Cosh); +class ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 9, 21, Asinh); +class ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 9, 21, Acosh); +class ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 9, 21, Atanh); class ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 9, 10, Scan); class ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 9, 10, Scatter); class ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 9, TfIdfVectorizer); @@ -362,7 +371,7 @@ class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOn class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 10, 10, int32_t, Resize); class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 10, 10, int8_t, Resize); class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 10, 10, uint8_t, Resize); -class ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 10, ThresholdedRelu); +class ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 10, 21, ThresholdedRelu); class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 10, 12, uint8_t, DequantizeLinear); class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 10, 12, int8_t, @@ -401,9 +410,9 @@ class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOn class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 11, 12, int64_t, Equal); class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 11, 12, float, Equal); class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 11, 12, double, Equal); -class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 11, float, Round); -class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 11, double, Round); -class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 11, MLFloat16, Round); +class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 11, 21, float, Round); +class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 11, 21, double, Round); +class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 11, 21, MLFloat16, Round); class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 11, uint8_t, DynamicQuantizeLinear); class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 11, 12, float, ArgMax); class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 11, 12, double, ArgMax); @@ -483,19 +492,19 @@ class ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDoma class ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 11, 12, Split); class ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 11, 12, Squeeze); class ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 11, 12, Unsqueeze); -class ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 11, Det); +class ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 11, 21, Det); class ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 11, 12, ScatterElements); class ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 11, NonMaxSuppression); class ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 11, 18, AveragePool); -class ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 11, MaxUnpool); +class ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 11, 21, MaxUnpool); class ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 11, 17, LpPool); -class ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 11, Conv); +class ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 11, 21, Conv); #ifdef MLAS_F16VEC_INTRINSICS_SUPPORTED -class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 11, MLFloat16, Conv); +class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 11, 21, MLFloat16, Conv); class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 11, 18, MLFloat16, AveragePool); #endif -class ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 11, ConvTranspose); +class ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 11, 21, ConvTranspose); class ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 11, 12, If); class ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 11, SequenceLength); class ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 11, SequenceAt); @@ -541,9 +550,9 @@ class ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDoma class ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 12, 12, Min); class ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 12, 12, Max); -class ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 12, MaxPool); +class ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 12, 21, MaxPool); #ifdef MLAS_F16VEC_INTRINSICS_SUPPORTED -class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 12, MLFloat16, MaxPool); +class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 12, 21, MLFloat16, MaxPool); #endif class ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 12, 12, Pow); class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 12, 12, float, ReduceMax); @@ -633,10 +642,10 @@ class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, class ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 13, 20, Flatten); class ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 13, LRN); class ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 13, MeanVarianceNormalization); -class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 13, float_float, Dropout); -class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 13, float_double, Dropout); -class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 13, double_float, Dropout); -class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 13, double_double, Dropout); +class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 13, 21, float_float, Dropout); +class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 13, 21, float_double, Dropout); +class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 13, 21, double_float, Dropout); +class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 13, 21, double_double, Dropout); class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 13, float, ArgMax); class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 13, double, ArgMax); class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 13, int8_t, ArgMax); @@ -669,21 +678,30 @@ class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOn class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 13, 13, double, Add); class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 13, 13, int32_t, Add); class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 13, 13, int64_t, Add); +class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 13, 13, uint32_t, Add); +class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 13, 13, uint64_t, Add); class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 13, 13, float, Sub); class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 13, 13, double, Sub); class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 13, 13, int32_t, Sub); class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 13, 13, int64_t, Sub); +class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 13, 13, uint32_t, Sub); +class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 13, 13, uint64_t, Sub); class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 13, 13, float, Mul); class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 13, 13, double, Mul); class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 13, 13, int32_t, Mul); class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 13, 13, int64_t, Mul); +class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 13, 13, uint32_t, Mul); +class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 13, 13, uint64_t, Mul); class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 13, 13, float, Div); class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 13, 13, double, Div); class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 13, 13, int32_t, Div); class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 13, 13, int64_t, Div); +class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 13, 13, uint32_t, Div); +class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 13, 13, uint64_t, Div); class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 13, float, Neg); class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 13, double, Neg); class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 13, int8_t, Neg); +class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 13, int16_t, Neg); class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 13, int32_t, Neg); class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 13, int64_t, Neg); class ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 13, Mod); @@ -824,29 +842,53 @@ class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, class ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 14, Trilu); class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 14, float, Add); class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 14, double, Add); +class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 14, int8_t, Add); +class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 14, int16_t, Add); class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 14, int32_t, Add); class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 14, int64_t, Add); +class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 14, uint8_t, Add); +class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 14, uint16_t, Add); +class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 14, uint32_t, Add); +class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 14, uint64_t, Add); class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 14, float, Sub); class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 14, double, Sub); +class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 14, int8_t, Sub); +class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 14, int16_t, Sub); class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 14, int32_t, Sub); class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 14, int64_t, Sub); +class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 14, uint8_t, Sub); +class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 14, uint16_t, Sub); +class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 14, uint32_t, Sub); +class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 14, uint64_t, Sub); class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 14, float, Mul); class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 14, double, Mul); +class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 14, int8_t, Mul); +class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 14, int16_t, Mul); class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 14, int32_t, Mul); class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 14, int64_t, Mul); +class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 14, uint8_t, Mul); +class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 14, uint16_t, Mul); +class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 14, uint32_t, Mul); +class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 14, uint64_t, Mul); class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 14, float, Div); class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 14, double, Div); +class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 14, int8_t, Div); +class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 14, int16_t, Div); class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 14, int32_t, Div); class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 14, int64_t, Div); +class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 14, uint8_t, Div); +class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 14, uint16_t, Div); +class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 14, uint32_t, Div); +class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 14, uint64_t, Div); class ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 14, 18, Reshape); class ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 14, 15, Identity); class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 14, 14, float, BatchNormalization); class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 14, 14, double, BatchNormalization); -class ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 14, GRU); -class ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 14, LSTM); -class ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 14, RNN); +class ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 14, 21, GRU); +class ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 14, 21, LSTM); +class ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 14, 21, RNN); // Opset 15 class ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 15, Pow); @@ -946,7 +988,7 @@ class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 18, double, ReduceSumSquare); class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 18, int32_t, ReduceSumSquare); class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 18, int64_t, ReduceSumSquare); -class ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 18, LpPool); +class ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 18, 21, LpPool); class ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 18, Col2Im); class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 18, int8_t, BitwiseAnd); class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 18, int16_t, BitwiseAnd); @@ -992,9 +1034,9 @@ class ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 18, Op // Opset 19 class ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 19, 20, Size); -class ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 19, AveragePool); +class ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 19, 21, AveragePool); #ifdef MLAS_F16VEC_INTRINSICS_SUPPORTED -class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 19, MLFloat16, AveragePool); +class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 19, 21, MLFloat16, AveragePool); #endif class ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 19, 20, Cast); class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 19, 20, int32_t, @@ -1062,8 +1104,8 @@ class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 20, int8_t, ReduceMin); class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 20, uint8_t, ReduceMin); class ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 20, DFT); -class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 20, float, GridSample); -class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 20, double, GridSample); +class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 20, 21, float, GridSample); +class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 20, 21, double, GridSample); class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 20, float, AffineGrid); class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 20, double, AffineGrid); class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 20, float, IsNaN); @@ -1125,6 +1167,56 @@ class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 21, Float8E5M2FNUZ, QuantizeLinear); #endif +// Opset 22 +class ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 22, Acos); +class ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 22, Cos); +class ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 22, Tan); +class ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 22, Asin); +class ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 22, Atan); +class ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 22, Sinh); +class ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 22, Cosh); +class ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 22, Asinh); +class ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 22, Acosh); +class ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 22, Atanh); +class ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 22, Conv); +class ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 22, ConvTranspose); +class ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 22, Det); +class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 22, float_float, Dropout); +class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 22, float_double, Dropout); +class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 22, double_float, Dropout); +class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 22, double_double, Dropout); +class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 22, float, GridSample); +class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 22, double, GridSample); +class ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 22, Elu); +class ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 22, EyeLike); +class ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 22, GlobalAveragePool); +class ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 22, GlobalMaxPool); +class ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 22, GRU); +class ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 22, LSTM); +class ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 22, RNN); +class ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 22, HardSigmoid); +class ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 22, InstanceNormalization); +class ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 22, LpPool); +class ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 22, MaxPool); +class ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 22, MaxUnpool); +class ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 22, Softplus); +class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 22, float, Round); +class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 22, double, Round); +class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 22, MLFloat16, Round); +class ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 22, Selu); +class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 22, float, Sin); +class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 22, double, Sin); +class ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 22, Softsign); +class ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 22, ThresholdedRelu); +class ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 22, AveragePool); + +#ifdef MLAS_F16VEC_INTRINSICS_SUPPORTED +class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 22, MLFloat16, Conv); +class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 22, MLFloat16, GlobalAveragePool); +class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 22, MLFloat16, MaxPool); +class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 22, MLFloat16, AveragePool); +#endif + // !!PLEASE READ BELOW!! Following that, add new entries above this comment /* *** IMPORTANT! *** @@ -1168,21 +1260,21 @@ Status RegisterOnnxOperatorKernels(KernelRegistry& kernel_registry) { static const BuildKernelCreateInfoFn function_table[] = { BuildKernelCreateInfo, // default entry to avoid the list become empty after ops-reducing BuildKernelCreateInfo, - BuildKernelCreateInfo, - BuildKernelCreateInfo, + BuildKernelCreateInfo, + BuildKernelCreateInfo, BuildKernelCreateInfo, BuildKernelCreateInfo, BuildKernelCreateInfo, - BuildKernelCreateInfo, + BuildKernelCreateInfo, BuildKernelCreateInfo, BuildKernelCreateInfo, - BuildKernelCreateInfo, - BuildKernelCreateInfo, + BuildKernelCreateInfo, + BuildKernelCreateInfo, BuildKernelCreateInfo, BuildKernelCreateInfo, BuildKernelCreateInfo, + BuildKernelCreateInfo, + BuildKernelCreateInfo, BuildKernelCreateInfo, BuildKernelCreateInfo, BuildKernelCreateInfo, + BuildKernelCreateInfo, + BuildKernelCreateInfo, BuildKernelCreateInfo, BuildKernelCreateInfo, BuildKernelCreateInfo, + BuildKernelCreateInfo, + BuildKernelCreateInfo, BuildKernelCreateInfo, BuildKernelCreateInfo, BuildKernelCreateInfo, + BuildKernelCreateInfo, + BuildKernelCreateInfo, BuildKernelCreateInfo, BuildKernelCreateInfo, BuildKernelCreateInfo, + BuildKernelCreateInfo, BuildKernelCreateInfo, BuildKernelCreateInfo, BuildKernelCreateInfo, - BuildKernelCreateInfo, - BuildKernelCreateInfo, - BuildKernelCreateInfo, - BuildKernelCreateInfo, - BuildKernelCreateInfo, - BuildKernelCreateInfo, - BuildKernelCreateInfo, + BuildKernelCreateInfo, + BuildKernelCreateInfo, + BuildKernelCreateInfo, + BuildKernelCreateInfo, + BuildKernelCreateInfo, + BuildKernelCreateInfo, + BuildKernelCreateInfo, BuildKernelCreateInfo, BuildKernelCreateInfo, BuildKernelCreateInfo, - BuildKernelCreateInfo, + BuildKernelCreateInfo, BuildKernelCreateInfo, BuildKernelCreateInfo, BuildKernelCreateInfo, BuildKernelCreateInfo, - BuildKernelCreateInfo, - BuildKernelCreateInfo, + BuildKernelCreateInfo, + BuildKernelCreateInfo, BuildKernelCreateInfo, BuildKernelCreateInfo, @@ -1567,7 +1677,7 @@ Status RegisterOnnxOperatorKernels(KernelRegistry& kernel_registry) { int32_t, Less)>, BuildKernelCreateInfo, - BuildKernelCreateInfo, + BuildKernelCreateInfo, BuildKernelCreateInfo, BuildKernelCreateInfo, BuildKernelCreateInfo, - BuildKernelCreateInfo, - BuildKernelCreateInfo, - BuildKernelCreateInfo, - BuildKernelCreateInfo, - BuildKernelCreateInfo, + BuildKernelCreateInfo, + BuildKernelCreateInfo, + BuildKernelCreateInfo, + BuildKernelCreateInfo, + BuildKernelCreateInfo, BuildKernelCreateInfo, BuildKernelCreateInfo, @@ -1678,7 +1788,7 @@ Status RegisterOnnxOperatorKernels(KernelRegistry& kernel_registry) { int8_t, Resize)>, BuildKernelCreateInfo, - BuildKernelCreateInfo, + BuildKernelCreateInfo, BuildKernelCreateInfo, BuildKernelCreateInfo, BuildKernelCreateInfo, - BuildKernelCreateInfo, - BuildKernelCreateInfo, - BuildKernelCreateInfo, + BuildKernelCreateInfo, + BuildKernelCreateInfo, + BuildKernelCreateInfo, BuildKernelCreateInfo, BuildKernelCreateInfo, BuildKernelCreateInfo, - BuildKernelCreateInfo, + BuildKernelCreateInfo, BuildKernelCreateInfo, BuildKernelCreateInfo, BuildKernelCreateInfo, - BuildKernelCreateInfo, + BuildKernelCreateInfo, BuildKernelCreateInfo, - BuildKernelCreateInfo, - BuildKernelCreateInfo, + BuildKernelCreateInfo, + BuildKernelCreateInfo, BuildKernelCreateInfo, BuildKernelCreateInfo, BuildKernelCreateInfo, @@ -1954,7 +2064,7 @@ Status RegisterOnnxOperatorKernels(KernelRegistry& kernel_registry) { BuildKernelCreateInfo, - BuildKernelCreateInfo, + BuildKernelCreateInfo, BuildKernelCreateInfo, @@ -2081,14 +2191,14 @@ Status RegisterOnnxOperatorKernels(KernelRegistry& kernel_registry) { BuildKernelCreateInfo, BuildKernelCreateInfo, - BuildKernelCreateInfo, - BuildKernelCreateInfo, - BuildKernelCreateInfo, - BuildKernelCreateInfo, + BuildKernelCreateInfo, + BuildKernelCreateInfo, + BuildKernelCreateInfo, + BuildKernelCreateInfo, BuildKernelCreateInfo, BuildKernelCreateInfo, @@ -2147,6 +2257,10 @@ Status RegisterOnnxOperatorKernels(KernelRegistry& kernel_registry) { int32_t, Add)>, BuildKernelCreateInfo, + BuildKernelCreateInfo, + BuildKernelCreateInfo, BuildKernelCreateInfo, BuildKernelCreateInfo, BuildKernelCreateInfo, + BuildKernelCreateInfo, + BuildKernelCreateInfo, BuildKernelCreateInfo, BuildKernelCreateInfo, BuildKernelCreateInfo, + BuildKernelCreateInfo, + BuildKernelCreateInfo, BuildKernelCreateInfo, BuildKernelCreateInfo, BuildKernelCreateInfo, + BuildKernelCreateInfo, + BuildKernelCreateInfo, BuildKernelCreateInfo, BuildKernelCreateInfo, BuildKernelCreateInfo, + BuildKernelCreateInfo, BuildKernelCreateInfo, BuildKernelCreateInfo, BuildKernelCreateInfo, @@ -2367,20 +2494,44 @@ Status RegisterOnnxOperatorKernels(KernelRegistry& kernel_registry) { BuildKernelCreateInfo, BuildKernelCreateInfo, BuildKernelCreateInfo, + BuildKernelCreateInfo, + BuildKernelCreateInfo, BuildKernelCreateInfo, BuildKernelCreateInfo, + BuildKernelCreateInfo, + BuildKernelCreateInfo, + BuildKernelCreateInfo, + BuildKernelCreateInfo, BuildKernelCreateInfo, BuildKernelCreateInfo, + BuildKernelCreateInfo, + BuildKernelCreateInfo, BuildKernelCreateInfo, BuildKernelCreateInfo, + BuildKernelCreateInfo, + BuildKernelCreateInfo, + BuildKernelCreateInfo, + BuildKernelCreateInfo, BuildKernelCreateInfo, BuildKernelCreateInfo, + BuildKernelCreateInfo, + BuildKernelCreateInfo, BuildKernelCreateInfo, BuildKernelCreateInfo, + BuildKernelCreateInfo, + BuildKernelCreateInfo, + BuildKernelCreateInfo, + BuildKernelCreateInfo, BuildKernelCreateInfo, BuildKernelCreateInfo, + BuildKernelCreateInfo, + BuildKernelCreateInfo, BuildKernelCreateInfo, BuildKernelCreateInfo, + BuildKernelCreateInfo, + BuildKernelCreateInfo, + BuildKernelCreateInfo, + BuildKernelCreateInfo, BuildKernelCreateInfo, BuildKernelCreateInfo, BuildKernelCreateInfo, - BuildKernelCreateInfo, - BuildKernelCreateInfo, - BuildKernelCreateInfo, + BuildKernelCreateInfo, + BuildKernelCreateInfo, + BuildKernelCreateInfo, // Opset 15 BuildKernelCreateInfo, @@ -2556,7 +2707,7 @@ Status RegisterOnnxOperatorKernels(KernelRegistry& kernel_registry) { ReduceSumSquare)>, BuildKernelCreateInfo, - BuildKernelCreateInfo, + BuildKernelCreateInfo, BuildKernelCreateInfo, BuildKernelCreateInfo, @@ -2633,7 +2784,7 @@ Status RegisterOnnxOperatorKernels(KernelRegistry& kernel_registry) { // Opset 19 BuildKernelCreateInfo, - BuildKernelCreateInfo, + BuildKernelCreateInfo, BuildKernelCreateInfo, BuildKernelCreateInfo, @@ -2708,10 +2859,10 @@ Status RegisterOnnxOperatorKernels(KernelRegistry& kernel_registry) { BuildKernelCreateInfo, BuildKernelCreateInfo, BuildKernelCreateInfo, - BuildKernelCreateInfo, - BuildKernelCreateInfo, + BuildKernelCreateInfo, + BuildKernelCreateInfo, BuildKernelCreateInfo, BuildKernelCreateInfo, #endif + + // Opset 22 + BuildKernelCreateInfo, + BuildKernelCreateInfo, + BuildKernelCreateInfo, + BuildKernelCreateInfo, + BuildKernelCreateInfo, + BuildKernelCreateInfo, + BuildKernelCreateInfo, + BuildKernelCreateInfo, + BuildKernelCreateInfo, + BuildKernelCreateInfo, + BuildKernelCreateInfo, + BuildKernelCreateInfo, + BuildKernelCreateInfo, + BuildKernelCreateInfo, + BuildKernelCreateInfo, + BuildKernelCreateInfo, + BuildKernelCreateInfo, + BuildKernelCreateInfo, + BuildKernelCreateInfo, + BuildKernelCreateInfo, + BuildKernelCreateInfo, + BuildKernelCreateInfo, + BuildKernelCreateInfo, + BuildKernelCreateInfo, + BuildKernelCreateInfo, + BuildKernelCreateInfo, + BuildKernelCreateInfo, + BuildKernelCreateInfo, + BuildKernelCreateInfo, + BuildKernelCreateInfo, + BuildKernelCreateInfo, + BuildKernelCreateInfo, + BuildKernelCreateInfo, + BuildKernelCreateInfo, + BuildKernelCreateInfo, + BuildKernelCreateInfo, + BuildKernelCreateInfo, + BuildKernelCreateInfo, + BuildKernelCreateInfo, + BuildKernelCreateInfo, + BuildKernelCreateInfo, }; for (auto& function_table_entry : function_table) { @@ -2819,18 +3018,26 @@ Status RegisterOnnxOperatorKernels(KernelRegistry& kernel_registry) { Status RegisterFp16Kernels(KernelRegistry& kernel_registry) { static const BuildKernelCreateInfoFn function_table[] = { BuildKernelCreateInfo, // default entry to avoid the list become empty after ops-reducing - BuildKernelCreateInfo, + BuildKernelCreateInfo, - BuildKernelCreateInfo, + BuildKernelCreateInfo, BuildKernelCreateInfo, - BuildKernelCreateInfo, + BuildKernelCreateInfo, BuildKernelCreateInfo, - BuildKernelCreateInfo, + BuildKernelCreateInfo, + BuildKernelCreateInfo, BuildKernelCreateInfo, BuildKernelCreateInfo::~ElementWiseRangedTransform() { Status Init(const onnxruntime::NodeAttributes& attributes) { \ return (GetFloatParam(#X, attributes, X)); \ } \ - GSL_SUPPRESS(r.11) \ + GSL_SUPPRESS(r.11) \ ElementWiseRangedTransform* Copy() const final { \ using T1 = typename std::remove_pointer::type; \ using T2 = typename std::remove_const::type; \ @@ -71,7 +71,7 @@ ElementWiseRangedTransform::~ElementWiseRangedTransform() { ORT_RETURN_IF_ERROR(GetFloatParam(#Y, attributes, Y)); \ return Status::OK(); \ } \ - GSL_SUPPRESS(r.11) \ + GSL_SUPPRESS(r.11) \ ElementWiseRangedTransform* Copy() const final { \ using T1 = typename std::remove_pointer::type; \ using T2 = typename std::remove_const::type; \ diff --git a/onnxruntime/core/providers/cpu/fp16/fp16_conv.cc b/onnxruntime/core/providers/cpu/fp16/fp16_conv.cc index 37db095e92570..87558ef3f2505 100644 --- a/onnxruntime/core/providers/cpu/fp16/fp16_conv.cc +++ b/onnxruntime/core/providers/cpu/fp16/fp16_conv.cc @@ -596,9 +596,15 @@ Status FusedConvFp16::Compute(OpKernelContext* context) const { // Operator definitions // +ONNX_CPU_OPERATOR_VERSIONED_TYPED_KERNEL( + Conv, + 11, 21, MLFloat16, + KernelDefBuilder().TypeConstraint("T", DataTypeImpl::GetTensorType()), + FusedConvFp16); + ONNX_CPU_OPERATOR_TYPED_KERNEL( Conv, - 11, + 22, MLFloat16, KernelDefBuilder().TypeConstraint("T", DataTypeImpl::GetTensorType()), FusedConvFp16); diff --git a/onnxruntime/core/providers/cpu/fp16/fp16_pool.cc b/onnxruntime/core/providers/cpu/fp16/fp16_pool.cc index 7c1e05f7ce277..9729500d0e1ff 100644 --- a/onnxruntime/core/providers/cpu/fp16/fp16_pool.cc +++ b/onnxruntime/core/providers/cpu/fp16/fp16_pool.cc @@ -224,9 +224,16 @@ ONNX_CPU_OPERATOR_VERSIONED_TYPED_KERNEL( KernelDefBuilder().TypeConstraint("T", DataTypeImpl::GetTensorType()), PoolFp16); +ONNX_CPU_OPERATOR_VERSIONED_TYPED_KERNEL( + MaxPool, + 12, 21, + MLFloat16, + KernelDefBuilder().TypeConstraint("T", DataTypeImpl::GetTensorType()), + PoolFp16); + ONNX_CPU_OPERATOR_TYPED_KERNEL( MaxPool, - 12, + 22, MLFloat16, KernelDefBuilder().TypeConstraint("T", DataTypeImpl::GetTensorType()), PoolFp16); @@ -237,16 +244,30 @@ ONNX_CPU_OPERATOR_VERSIONED_TYPED_KERNEL( KernelDefBuilder().TypeConstraint("T", DataTypeImpl::GetTensorType()), PoolFp16); +ONNX_CPU_OPERATOR_VERSIONED_TYPED_KERNEL( + AveragePool, + 19, 21, + MLFloat16, + KernelDefBuilder().TypeConstraint("T", DataTypeImpl::GetTensorType()), + PoolFp16); + ONNX_CPU_OPERATOR_TYPED_KERNEL( AveragePool, - 19, + 22, + MLFloat16, + KernelDefBuilder().TypeConstraint("T", DataTypeImpl::GetTensorType()), + PoolFp16); + +ONNX_CPU_OPERATOR_VERSIONED_TYPED_KERNEL( + GlobalAveragePool, + 1, 21, MLFloat16, KernelDefBuilder().TypeConstraint("T", DataTypeImpl::GetTensorType()), PoolFp16); ONNX_CPU_OPERATOR_TYPED_KERNEL( GlobalAveragePool, - 1, + 22, MLFloat16, KernelDefBuilder().TypeConstraint("T", DataTypeImpl::GetTensorType()), PoolFp16); diff --git a/onnxruntime/core/providers/cpu/math/det.cc b/onnxruntime/core/providers/cpu/math/det.cc index b32b44cf942c8..dc3f11d84393f 100644 --- a/onnxruntime/core/providers/cpu/math/det.cc +++ b/onnxruntime/core/providers/cpu/math/det.cc @@ -13,9 +13,17 @@ using namespace onnxruntime::common; namespace onnxruntime { -ONNX_CPU_OPERATOR_KERNEL( +ONNX_CPU_OPERATOR_VERSIONED_KERNEL( Det, 11, + 21, + KernelDefBuilder().TypeConstraint("T", DataTypeImpl::GetTensorType()), + Det); + +// Opset 22 starts to support bfloat16 +ONNX_CPU_OPERATOR_KERNEL( + Det, + 22, KernelDefBuilder().TypeConstraint("T", DataTypeImpl::GetTensorType()), Det); diff --git a/onnxruntime/core/providers/cpu/math/element_wise_ops.cc b/onnxruntime/core/providers/cpu/math/element_wise_ops.cc index a78ff69e5c894..fde471963f663 100644 --- a/onnxruntime/core/providers/cpu/math/element_wise_ops.cc +++ b/onnxruntime/core/providers/cpu/math/element_wise_ops.cc @@ -157,53 +157,93 @@ REG_ELEMENTWISE_VERSIONED_TYPED_KERNEL(Add, 7, 12, float, Add); REG_ELEMENTWISE_VERSIONED_TYPED_KERNEL(Add, 7, 12, double, Add); REG_ELEMENTWISE_VERSIONED_TYPED_KERNEL(Add, 7, 12, int32_t, Add); REG_ELEMENTWISE_VERSIONED_TYPED_KERNEL(Add, 7, 12, int64_t, Add); +REG_ELEMENTWISE_VERSIONED_TYPED_KERNEL(Add, 7, 12, uint32_t, Add); +REG_ELEMENTWISE_VERSIONED_TYPED_KERNEL(Add, 7, 12, uint64_t, Add); REG_ELEMENTWISE_VERSIONED_TYPED_KERNEL(Add, 13, 13, float, Add); REG_ELEMENTWISE_VERSIONED_TYPED_KERNEL(Add, 13, 13, double, Add); REG_ELEMENTWISE_VERSIONED_TYPED_KERNEL(Add, 13, 13, int32_t, Add); REG_ELEMENTWISE_VERSIONED_TYPED_KERNEL(Add, 13, 13, int64_t, Add); +REG_ELEMENTWISE_VERSIONED_TYPED_KERNEL(Add, 13, 13, uint32_t, Add); +REG_ELEMENTWISE_VERSIONED_TYPED_KERNEL(Add, 13, 13, uint64_t, Add); REG_ELEMENTWISE_TYPED_KERNEL(Add, 14, float, Add); REG_ELEMENTWISE_TYPED_KERNEL(Add, 14, double, Add); +REG_ELEMENTWISE_TYPED_KERNEL(Add, 14, int8_t, Add); +REG_ELEMENTWISE_TYPED_KERNEL(Add, 14, int16_t, Add); REG_ELEMENTWISE_TYPED_KERNEL(Add, 14, int32_t, Add); REG_ELEMENTWISE_TYPED_KERNEL(Add, 14, int64_t, Add); +REG_ELEMENTWISE_TYPED_KERNEL(Add, 14, uint8_t, Add); +REG_ELEMENTWISE_TYPED_KERNEL(Add, 14, uint16_t, Add); +REG_ELEMENTWISE_TYPED_KERNEL(Add, 14, uint32_t, Add); +REG_ELEMENTWISE_TYPED_KERNEL(Add, 14, uint64_t, Add); REG_ELEMENTWISE_VERSIONED_TYPED_KERNEL(Sub, 7, 12, float, Sub); REG_ELEMENTWISE_VERSIONED_TYPED_KERNEL(Sub, 7, 12, double, Sub); REG_ELEMENTWISE_VERSIONED_TYPED_KERNEL(Sub, 7, 12, int32_t, Sub); REG_ELEMENTWISE_VERSIONED_TYPED_KERNEL(Sub, 7, 12, int64_t, Sub); +REG_ELEMENTWISE_VERSIONED_TYPED_KERNEL(Sub, 7, 12, uint32_t, Sub); +REG_ELEMENTWISE_VERSIONED_TYPED_KERNEL(Sub, 7, 12, uint64_t, Sub); REG_ELEMENTWISE_VERSIONED_TYPED_KERNEL(Sub, 13, 13, float, Sub); REG_ELEMENTWISE_VERSIONED_TYPED_KERNEL(Sub, 13, 13, double, Sub); REG_ELEMENTWISE_VERSIONED_TYPED_KERNEL(Sub, 13, 13, int32_t, Sub); REG_ELEMENTWISE_VERSIONED_TYPED_KERNEL(Sub, 13, 13, int64_t, Sub); +REG_ELEMENTWISE_VERSIONED_TYPED_KERNEL(Sub, 13, 13, uint32_t, Sub); +REG_ELEMENTWISE_VERSIONED_TYPED_KERNEL(Sub, 13, 13, uint64_t, Sub); REG_ELEMENTWISE_TYPED_KERNEL(Sub, 14, float, Sub); REG_ELEMENTWISE_TYPED_KERNEL(Sub, 14, double, Sub); +REG_ELEMENTWISE_TYPED_KERNEL(Sub, 14, int8_t, Sub); +REG_ELEMENTWISE_TYPED_KERNEL(Sub, 14, int16_t, Sub); REG_ELEMENTWISE_TYPED_KERNEL(Sub, 14, int32_t, Sub); REG_ELEMENTWISE_TYPED_KERNEL(Sub, 14, int64_t, Sub); +REG_ELEMENTWISE_TYPED_KERNEL(Sub, 14, uint8_t, Sub); +REG_ELEMENTWISE_TYPED_KERNEL(Sub, 14, uint16_t, Sub); +REG_ELEMENTWISE_TYPED_KERNEL(Sub, 14, uint32_t, Sub); +REG_ELEMENTWISE_TYPED_KERNEL(Sub, 14, uint64_t, Sub); REG_ELEMENTWISE_VERSIONED_TYPED_KERNEL(Mul, 7, 12, float, Mul); REG_ELEMENTWISE_VERSIONED_TYPED_KERNEL(Mul, 7, 12, double, Mul); REG_ELEMENTWISE_VERSIONED_TYPED_KERNEL(Mul, 7, 12, int32_t, Mul); REG_ELEMENTWISE_VERSIONED_TYPED_KERNEL(Mul, 7, 12, int64_t, Mul); +REG_ELEMENTWISE_VERSIONED_TYPED_KERNEL(Mul, 7, 12, uint32_t, Mul); +REG_ELEMENTWISE_VERSIONED_TYPED_KERNEL(Mul, 7, 12, uint64_t, Mul); REG_ELEMENTWISE_VERSIONED_TYPED_KERNEL(Mul, 13, 13, float, Mul); REG_ELEMENTWISE_VERSIONED_TYPED_KERNEL(Mul, 13, 13, double, Mul); REG_ELEMENTWISE_VERSIONED_TYPED_KERNEL(Mul, 13, 13, int32_t, Mul); REG_ELEMENTWISE_VERSIONED_TYPED_KERNEL(Mul, 13, 13, int64_t, Mul); +REG_ELEMENTWISE_VERSIONED_TYPED_KERNEL(Mul, 13, 13, uint32_t, Mul); +REG_ELEMENTWISE_VERSIONED_TYPED_KERNEL(Mul, 13, 13, uint64_t, Mul); REG_ELEMENTWISE_TYPED_KERNEL(Mul, 14, float, Mul); REG_ELEMENTWISE_TYPED_KERNEL(Mul, 14, double, Mul); +REG_ELEMENTWISE_TYPED_KERNEL(Mul, 14, int8_t, Mul); +REG_ELEMENTWISE_TYPED_KERNEL(Mul, 14, int16_t, Mul); REG_ELEMENTWISE_TYPED_KERNEL(Mul, 14, int32_t, Mul); REG_ELEMENTWISE_TYPED_KERNEL(Mul, 14, int64_t, Mul); +REG_ELEMENTWISE_TYPED_KERNEL(Mul, 14, uint8_t, Mul); +REG_ELEMENTWISE_TYPED_KERNEL(Mul, 14, uint16_t, Mul); +REG_ELEMENTWISE_TYPED_KERNEL(Mul, 14, uint32_t, Mul); +REG_ELEMENTWISE_TYPED_KERNEL(Mul, 14, uint64_t, Mul); REG_ELEMENTWISE_VERSIONED_TYPED_KERNEL(Div, 7, 12, float, Div); REG_ELEMENTWISE_VERSIONED_TYPED_KERNEL(Div, 7, 12, double, Div); REG_ELEMENTWISE_VERSIONED_TYPED_KERNEL(Div, 7, 12, int32_t, Div); REG_ELEMENTWISE_VERSIONED_TYPED_KERNEL(Div, 7, 12, int64_t, Div); +REG_ELEMENTWISE_VERSIONED_TYPED_KERNEL(Div, 7, 12, uint32_t, Div); +REG_ELEMENTWISE_VERSIONED_TYPED_KERNEL(Div, 7, 12, uint64_t, Div); REG_ELEMENTWISE_VERSIONED_TYPED_KERNEL(Div, 13, 13, float, Div); REG_ELEMENTWISE_VERSIONED_TYPED_KERNEL(Div, 13, 13, double, Div); REG_ELEMENTWISE_VERSIONED_TYPED_KERNEL(Div, 13, 13, int32_t, Div); REG_ELEMENTWISE_VERSIONED_TYPED_KERNEL(Div, 13, 13, int64_t, Div); +REG_ELEMENTWISE_VERSIONED_TYPED_KERNEL(Div, 13, 13, uint32_t, Div); +REG_ELEMENTWISE_VERSIONED_TYPED_KERNEL(Div, 13, 13, uint64_t, Div); REG_ELEMENTWISE_TYPED_KERNEL(Div, 14, float, Div); REG_ELEMENTWISE_TYPED_KERNEL(Div, 14, double, Div); +REG_ELEMENTWISE_TYPED_KERNEL(Div, 14, int8_t, Div); +REG_ELEMENTWISE_TYPED_KERNEL(Div, 14, int16_t, Div); REG_ELEMENTWISE_TYPED_KERNEL(Div, 14, int32_t, Div); REG_ELEMENTWISE_TYPED_KERNEL(Div, 14, int64_t, Div); +REG_ELEMENTWISE_TYPED_KERNEL(Div, 14, uint8_t, Div); +REG_ELEMENTWISE_TYPED_KERNEL(Div, 14, uint16_t, Div); +REG_ELEMENTWISE_TYPED_KERNEL(Div, 14, uint32_t, Div); +REG_ELEMENTWISE_TYPED_KERNEL(Div, 14, uint64_t, Div); REG_ELEMENTWISE_VERSIONED_TYPED_KERNEL(Abs, 6, 12, float, Abs); REG_ELEMENTWISE_VERSIONED_TYPED_KERNEL(Abs, 6, 12, double, Abs); @@ -230,11 +270,13 @@ REG_ELEMENTWISE_TYPED_KERNEL(Abs, 13, uint64_t, Abs); REG_ELEMENTWISE_VERSIONED_TYPED_KERNEL(Neg, 6, 12, float, Neg); REG_ELEMENTWISE_VERSIONED_TYPED_KERNEL(Neg, 6, 12, double, Neg); REG_ELEMENTWISE_VERSIONED_TYPED_KERNEL(Neg, 6, 12, int8_t, Neg); +REG_ELEMENTWISE_VERSIONED_TYPED_KERNEL(Neg, 6, 12, int16_t, Neg); REG_ELEMENTWISE_VERSIONED_TYPED_KERNEL(Neg, 6, 12, int32_t, Neg); REG_ELEMENTWISE_VERSIONED_TYPED_KERNEL(Neg, 6, 12, int64_t, Neg); REG_ELEMENTWISE_TYPED_KERNEL(Neg, 13, float, Neg); REG_ELEMENTWISE_TYPED_KERNEL(Neg, 13, double, Neg); REG_ELEMENTWISE_TYPED_KERNEL(Neg, 13, int8_t, Neg); +REG_ELEMENTWISE_TYPED_KERNEL(Neg, 13, int16_t, Neg); REG_ELEMENTWISE_TYPED_KERNEL(Neg, 13, int32_t, Neg); REG_ELEMENTWISE_TYPED_KERNEL(Neg, 13, int64_t, Neg); @@ -1337,16 +1379,30 @@ class Sin final : public OpKernel { } }; +ONNX_CPU_OPERATOR_VERSIONED_TYPED_KERNEL( + Sin, + 7, 21, + float, + KernelDefBuilder().TypeConstraint("T", DataTypeImpl::GetTensorType()), + Sin); + +ONNX_CPU_OPERATOR_VERSIONED_TYPED_KERNEL( + Sin, + 7, 21, + double, + KernelDefBuilder().TypeConstraint("T", DataTypeImpl::GetTensorType()), + Sin); + ONNX_CPU_OPERATOR_TYPED_KERNEL( Sin, - 7, + 22, float, KernelDefBuilder().TypeConstraint("T", DataTypeImpl::GetTensorType()), Sin); ONNX_CPU_OPERATOR_TYPED_KERNEL( Sin, - 7, + 22, double, KernelDefBuilder().TypeConstraint("T", DataTypeImpl::GetTensorType()), Sin); @@ -1365,9 +1421,17 @@ class Cos final : public OpKernel { } }; -ONNX_CPU_OPERATOR_KERNEL( +ONNX_CPU_OPERATOR_VERSIONED_KERNEL( Cos, 7, + 21, + KernelDefBuilder().TypeConstraint("T", DataTypeImpl::GetTensorType()), + Cos); + +// Opset 22 starts to support bfloat16 +ONNX_CPU_OPERATOR_KERNEL( + Cos, + 22, KernelDefBuilder().TypeConstraint("T", DataTypeImpl::GetTensorType()), Cos); @@ -1385,9 +1449,15 @@ class Tan final : public OpKernel { } }; +ONNX_CPU_OPERATOR_VERSIONED_KERNEL( + Tan, + 7, 21, + KernelDefBuilder().TypeConstraint("T", DataTypeImpl::GetTensorType()), + Tan); + ONNX_CPU_OPERATOR_KERNEL( Tan, - 7, + 22, KernelDefBuilder().TypeConstraint("T", DataTypeImpl::GetTensorType()), Tan); @@ -1405,9 +1475,17 @@ class Asin final : public OpKernel { } }; -ONNX_CPU_OPERATOR_KERNEL( +ONNX_CPU_OPERATOR_VERSIONED_KERNEL( Asin, 7, + 21, + KernelDefBuilder().TypeConstraint("T", DataTypeImpl::GetTensorType()), + Asin); + +// Opset 22 starts to support bfloat16 +ONNX_CPU_OPERATOR_KERNEL( + Asin, + 22, KernelDefBuilder().TypeConstraint("T", DataTypeImpl::GetTensorType()), Asin); @@ -1425,9 +1503,17 @@ class Acos final : public OpKernel { } }; -ONNX_CPU_OPERATOR_KERNEL( +ONNX_CPU_OPERATOR_VERSIONED_KERNEL( Acos, 7, + 21, + KernelDefBuilder().TypeConstraint("T", DataTypeImpl::GetTensorType()), + Acos); + +// Opset 22 starts to support bfloat16 +ONNX_CPU_OPERATOR_KERNEL( + Acos, + 22, KernelDefBuilder().TypeConstraint("T", DataTypeImpl::GetTensorType()), Acos); @@ -1445,9 +1531,17 @@ class Atan final : public OpKernel { } }; -ONNX_CPU_OPERATOR_KERNEL( +ONNX_CPU_OPERATOR_VERSIONED_KERNEL( Atan, 7, + 21, + KernelDefBuilder().TypeConstraint("T", DataTypeImpl::GetTensorType()), + Atan); + +// Opset 22 starts to support bfloat16 +ONNX_CPU_OPERATOR_KERNEL( + Atan, + 22, KernelDefBuilder().TypeConstraint("T", DataTypeImpl::GetTensorType()), Atan); @@ -1465,9 +1559,17 @@ class Sinh final : public OpKernel { } }; -ONNX_CPU_OPERATOR_KERNEL( +ONNX_CPU_OPERATOR_VERSIONED_KERNEL( Sinh, 9, + 21, + KernelDefBuilder().TypeConstraint("T", DataTypeImpl::GetTensorType()), + Sinh); + +// Opset 22 starts to support bfloat16 +ONNX_CPU_OPERATOR_KERNEL( + Sinh, + 22, KernelDefBuilder().TypeConstraint("T", DataTypeImpl::GetTensorType()), Sinh); @@ -1485,9 +1587,17 @@ class Cosh final : public OpKernel { } }; -ONNX_CPU_OPERATOR_KERNEL( +ONNX_CPU_OPERATOR_VERSIONED_KERNEL( Cosh, 9, + 21, + KernelDefBuilder().TypeConstraint("T", DataTypeImpl::GetTensorType()), + Cosh); + +// Opset 22 starts to support bfloat16 +ONNX_CPU_OPERATOR_KERNEL( + Cosh, + 22, KernelDefBuilder().TypeConstraint("T", DataTypeImpl::GetTensorType()), Cosh); @@ -1517,9 +1627,17 @@ class Asinh final : public OpKernel { ORT_DISALLOW_COPY_ASSIGNMENT_AND_MOVE(Asinh); }; -ONNX_CPU_OPERATOR_KERNEL( +ONNX_CPU_OPERATOR_VERSIONED_KERNEL( Asinh, 9, + 21, + KernelDefBuilder().TypeConstraint("T", DataTypeImpl::GetTensorType()), + Asinh); + +// Opset 22 starts to support bfloat16 +ONNX_CPU_OPERATOR_KERNEL( + Asinh, + 22, KernelDefBuilder().TypeConstraint("T", DataTypeImpl::GetTensorType()), Asinh); @@ -1549,9 +1667,17 @@ class Acosh final : public OpKernel { ORT_DISALLOW_COPY_ASSIGNMENT_AND_MOVE(Acosh); }; -ONNX_CPU_OPERATOR_KERNEL( +ONNX_CPU_OPERATOR_VERSIONED_KERNEL( Acosh, 9, + 21, + KernelDefBuilder().TypeConstraint("T", DataTypeImpl::GetTensorType()), + Acosh); + +// Opset 22 starts to support bfloat16 +ONNX_CPU_OPERATOR_KERNEL( + Acosh, + 22, KernelDefBuilder().TypeConstraint("T", DataTypeImpl::GetTensorType()), Acosh); @@ -1581,9 +1707,17 @@ class Atanh final : public OpKernel { ORT_DISALLOW_COPY_ASSIGNMENT_AND_MOVE(Atanh); }; -ONNX_CPU_OPERATOR_KERNEL( +ONNX_CPU_OPERATOR_VERSIONED_KERNEL( Atanh, 9, + 21, + KernelDefBuilder().TypeConstraint("T", DataTypeImpl::GetTensorType()), + Atanh); + +// Opset 22 starts to support bfloat16 +ONNX_CPU_OPERATOR_KERNEL( + Atanh, + 22, KernelDefBuilder().TypeConstraint("T", DataTypeImpl::GetTensorType()), Atanh); diff --git a/onnxruntime/core/providers/cpu/math/round.cc b/onnxruntime/core/providers/cpu/math/round.cc index 86c2b91c10535..86be7cce43e9b 100644 --- a/onnxruntime/core/providers/cpu/math/round.cc +++ b/onnxruntime/core/providers/cpu/math/round.cc @@ -12,9 +12,13 @@ namespace onnxruntime { -ONNX_CPU_OPERATOR_TYPED_KERNEL(Round, 11, MLFloat16, KernelDefBuilder().TypeConstraint("T", DataTypeImpl::GetTensorType()), Round); -ONNX_CPU_OPERATOR_TYPED_KERNEL(Round, 11, float, KernelDefBuilder().TypeConstraint("T", DataTypeImpl::GetTensorType()), Round); -ONNX_CPU_OPERATOR_TYPED_KERNEL(Round, 11, double, KernelDefBuilder().TypeConstraint("T", DataTypeImpl::GetTensorType()), Round); +ONNX_CPU_OPERATOR_VERSIONED_TYPED_KERNEL(Round, 11, 21, MLFloat16, KernelDefBuilder().TypeConstraint("T", DataTypeImpl::GetTensorType()), Round); +ONNX_CPU_OPERATOR_VERSIONED_TYPED_KERNEL(Round, 11, 21, float, KernelDefBuilder().TypeConstraint("T", DataTypeImpl::GetTensorType()), Round); +ONNX_CPU_OPERATOR_VERSIONED_TYPED_KERNEL(Round, 11, 21, double, KernelDefBuilder().TypeConstraint("T", DataTypeImpl::GetTensorType()), Round); + +ONNX_CPU_OPERATOR_TYPED_KERNEL(Round, 22, MLFloat16, KernelDefBuilder().TypeConstraint("T", DataTypeImpl::GetTensorType()), Round); +ONNX_CPU_OPERATOR_TYPED_KERNEL(Round, 22, float, KernelDefBuilder().TypeConstraint("T", DataTypeImpl::GetTensorType()), Round); +ONNX_CPU_OPERATOR_TYPED_KERNEL(Round, 22, double, KernelDefBuilder().TypeConstraint("T", DataTypeImpl::GetTensorType()), Round); template Status Round::Compute(OpKernelContext* ctx) const { diff --git a/onnxruntime/core/providers/cpu/nn/Unpool.cc b/onnxruntime/core/providers/cpu/nn/Unpool.cc index fc5744e24cea6..5997dcedebfd7 100644 --- a/onnxruntime/core/providers/cpu/nn/Unpool.cc +++ b/onnxruntime/core/providers/cpu/nn/Unpool.cc @@ -23,9 +23,17 @@ ONNX_CPU_OPERATOR_VERSIONED_KERNEL( .TypeConstraint("T2", DataTypeImpl::GetTensorType()), MaxUnpool); +ONNX_CPU_OPERATOR_VERSIONED_KERNEL( + MaxUnpool, + 11, 21, + KernelDefBuilder() + .TypeConstraint("T1", DataTypeImpl::GetTensorType()) + .TypeConstraint("T2", DataTypeImpl::GetTensorType()), + MaxUnpool); + ONNX_CPU_OPERATOR_KERNEL( MaxUnpool, - 11, + 22, KernelDefBuilder() .TypeConstraint("T1", DataTypeImpl::GetTensorType()) .TypeConstraint("T2", DataTypeImpl::GetTensorType()), diff --git a/onnxruntime/core/providers/cpu/nn/batch_norm.h b/onnxruntime/core/providers/cpu/nn/batch_norm.h index be9bc3368ea41..1febfac86dc30 100644 --- a/onnxruntime/core/providers/cpu/nn/batch_norm.h +++ b/onnxruntime/core/providers/cpu/nn/batch_norm.h @@ -75,9 +75,11 @@ class BatchNorm : public OpKernel { const TensorShape& x_shape = X->Shape(); Tensor* Y = p_op_kernel_context->Output(0, x_shape); + // X shape is [N, C, D1, D2, ... Dn], but it can also be 1-D according to onnx spec: + // "The op also accepts single dimension input of size N in which case C is assumed to be 1" const auto& dims_vec = x_shape.GetDims(); const size_t N = onnxruntime::narrow(dims_vec[0]); - const size_t C = onnxruntime::narrow(dims_vec[1]); // assume NCHW as per the spec + const size_t C = dims_vec.size() == 1 ? 1 : onnxruntime::narrow(dims_vec[1]); // calculate sample_size (per individual channel) size_t sample_size = 1; diff --git a/onnxruntime/core/providers/cpu/nn/batch_norm_helper.h b/onnxruntime/core/providers/cpu/nn/batch_norm_helper.h index ccecbabfa3db3..b5aa522f718e2 100644 --- a/onnxruntime/core/providers/cpu/nn/batch_norm_helper.h +++ b/onnxruntime/core/providers/cpu/nn/batch_norm_helper.h @@ -28,6 +28,9 @@ class BatchNormHelper { // NHWC dependent shape: X // All other shapes are assumed to be in NCHW layout? const auto& x_dims = X->Shape().GetDims(); + if (x_dims.size() < 1) { + return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT, "Invalid input X: NumDimensions() < 1"); + } // If x_dims size < 2, num_channels defaults to 1. int64_t num_channels; diff --git a/onnxruntime/core/providers/cpu/nn/conv.cc b/onnxruntime/core/providers/cpu/nn/conv.cc index 51dfb143fb916..d10213f55d5d4 100644 --- a/onnxruntime/core/providers/cpu/nn/conv.cc +++ b/onnxruntime/core/providers/cpu/nn/conv.cc @@ -300,9 +300,17 @@ ONNX_CPU_OPERATOR_VERSIONED_KERNEL( KernelDefBuilder().TypeConstraint("T", DataTypeImpl::GetTensorType()), Conv); -ONNX_CPU_OPERATOR_KERNEL( +ONNX_CPU_OPERATOR_VERSIONED_KERNEL( Conv, 11, + 21, + KernelDefBuilder().TypeConstraint("T", DataTypeImpl::GetTensorType()), + Conv); + +// Opset 22 starts to support bfloat16 +ONNX_CPU_OPERATOR_KERNEL( + Conv, + 22, KernelDefBuilder().TypeConstraint("T", DataTypeImpl::GetTensorType()), Conv); diff --git a/onnxruntime/core/providers/cpu/nn/conv_transpose.cc b/onnxruntime/core/providers/cpu/nn/conv_transpose.cc index f0c1b0b409831..4e0b560e2ec3c 100644 --- a/onnxruntime/core/providers/cpu/nn/conv_transpose.cc +++ b/onnxruntime/core/providers/cpu/nn/conv_transpose.cc @@ -30,9 +30,17 @@ ONNX_CPU_OPERATOR_VERSIONED_KERNEL( KernelDefBuilder().TypeConstraint("T", DataTypeImpl::GetTensorType()), ConvTranspose); -ONNX_CPU_OPERATOR_KERNEL( +ONNX_CPU_OPERATOR_VERSIONED_KERNEL( ConvTranspose, 11, + 21, + KernelDefBuilder().TypeConstraint("T", DataTypeImpl::GetTensorType()), + ConvTranspose); + +// Opset 22 starts to support bfloat16 +ONNX_CPU_OPERATOR_KERNEL( + ConvTranspose, + 22, KernelDefBuilder().TypeConstraint("T", DataTypeImpl::GetTensorType()), ConvTranspose); diff --git a/onnxruntime/core/providers/cpu/nn/dropout_op.cc b/onnxruntime/core/providers/cpu/nn/dropout_op.cc index 8b5ce947577bf..fb475ebc0e99f 100644 --- a/onnxruntime/core/providers/cpu/nn/dropout_op.cc +++ b/onnxruntime/core/providers/cpu/nn/dropout_op.cc @@ -19,12 +19,10 @@ namespace onnxruntime { Dropout); #define REGISTER_KERNEL_TYPED(OpName, VER, T1, T2) \ - ONNX_OPERATOR_TYPED_KERNEL_EX( \ + ONNX_CPU_OPERATOR_TYPED_KERNEL( \ OpName, \ - kOnnxDomain, \ VER, \ T1##_##T2, \ - kCpuExecutionProvider, \ KernelDefBuilder() \ .TypeConstraint("T", DataTypeImpl::GetTensorType()) \ .TypeConstraint("T1", DataTypeImpl::GetTensorType()) \ @@ -42,8 +40,14 @@ REGISTER_KERNEL_VERSIONED_TYPED(Dropout, 12, 12, float, double) REGISTER_KERNEL_VERSIONED_TYPED(Dropout, 12, 12, double, float) REGISTER_KERNEL_VERSIONED_TYPED(Dropout, 12, 12, double, double) -REGISTER_KERNEL_TYPED(Dropout, 13, float, float) -REGISTER_KERNEL_TYPED(Dropout, 13, float, double) -REGISTER_KERNEL_TYPED(Dropout, 13, double, float) -REGISTER_KERNEL_TYPED(Dropout, 13, double, double) +REGISTER_KERNEL_VERSIONED_TYPED(Dropout, 13, 21, float, float) +REGISTER_KERNEL_VERSIONED_TYPED(Dropout, 13, 21, float, double) +REGISTER_KERNEL_VERSIONED_TYPED(Dropout, 13, 21, double, float) +REGISTER_KERNEL_VERSIONED_TYPED(Dropout, 13, 21, double, double) + +// Opset 22 supports BFloat16 +REGISTER_KERNEL_TYPED(Dropout, 22, float, float) +REGISTER_KERNEL_TYPED(Dropout, 22, float, double) +REGISTER_KERNEL_TYPED(Dropout, 22, double, float) +REGISTER_KERNEL_TYPED(Dropout, 22, double, double) } // namespace onnxruntime diff --git a/onnxruntime/core/providers/cpu/nn/instance_norm.cc b/onnxruntime/core/providers/cpu/nn/instance_norm.cc index 4eee9a8409cae..5d4e01808143f 100644 --- a/onnxruntime/core/providers/cpu/nn/instance_norm.cc +++ b/onnxruntime/core/providers/cpu/nn/instance_norm.cc @@ -8,9 +8,16 @@ using namespace ::onnxruntime::common; namespace onnxruntime { -ONNX_CPU_OPERATOR_KERNEL( +ONNX_CPU_OPERATOR_VERSIONED_KERNEL( InstanceNormalization, 6, + 21, + KernelDefBuilder().TypeConstraint("T", DataTypeImpl::GetTensorType()), + InstanceNorm); + +ONNX_CPU_OPERATOR_KERNEL( + InstanceNormalization, + 22, KernelDefBuilder().TypeConstraint("T", DataTypeImpl::GetTensorType()), InstanceNorm); diff --git a/onnxruntime/core/providers/cpu/nn/layer_norm_helper.h b/onnxruntime/core/providers/cpu/nn/layer_norm_helper.h new file mode 100644 index 0000000000000..ed5ea83d9de30 --- /dev/null +++ b/onnxruntime/core/providers/cpu/nn/layer_norm_helper.h @@ -0,0 +1,116 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +#pragma once + +#include "core/framework/tensor_shape.h" +#include "core/common/status.h" +#include "core/common/narrow.h" + +namespace onnxruntime { + +constexpr const char* kLayerNormInputShapeMismatchError = + "Size of scale and bias (if provided) must match X.shape[axis:], " + "or scale and bias (with same shape) can be broadcasted to X when axis is 2."; + +constexpr const char* kLayerNormInvalidSize = "Size of X.shape[axis:] must be larger than 1, got "; + +constexpr int64_t kLayerNormInvalidInput = -1; + +struct LayerNormParams { + int64_t num_rows; + int64_t norm_size; // size per row + int64_t scale_size; + int64_t bias_size; + int64_t broadcast_param; +}; + +// We support broadcasting for axis=2, where the first two dimensions are rows, and the rest are columns. +// When X shape is (B, S, ...), and x_row (index of one row in X) is in the range of [0, B * S). +// We support scale and bias shape like below: +// When scale and bias shape is (1, 1, ...) or (...), value of broadcast_param is 0. +// When scale and bias shape is (B, 1, ...), value of broadcast_param is S. +// When scale and bias shape is (B, S, ...), value of broadcast_param is 1. +// When scale and bias shape is (1, S, ...), value of broadcast_param is -S. + +// Below is a macro to compute the offset for scale and bias data for a row of X. +#ifndef LAYER_NORM_SCALE_BIAS_OFFSET +#define LAYER_NORM_SCALE_BIAS_OFFSET(broadcast_param, x_row, norm_size) \ + ((broadcast_param == 0) ? 0 \ + : norm_size * (broadcast_param > 0 ? x_row / broadcast_param : x_row % (-broadcast_param))) +#endif + +class LayerNormHelper { + public: + static Status CheckInputs(const TensorShape& x_shape, + const TensorShape& scale_shape, + const TensorShape& bias_shape, + bool has_bias, + int64_t axis, + LayerNormParams& params) { + params.num_rows = x_shape.SizeToDimension(onnxruntime::narrow(axis)); + params.norm_size = x_shape.SizeFromDimension(onnxruntime::narrow(axis)); + params.scale_size = scale_shape.Size(); + params.bias_size = bias_shape.Size(); + params.broadcast_param = 0; + + if (params.norm_size <= 1) { + params.broadcast_param = kLayerNormInvalidInput; + return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT, kLayerNormInvalidSize, params.norm_size); + } else if (params.scale_size != params.norm_size || (has_bias && params.bias_size != params.scale_size)) { + params.broadcast_param = GetBroadcastParam(x_shape, scale_shape, has_bias ? &bias_shape : nullptr, axis); + if (params.broadcast_param == kLayerNormInvalidInput) { + return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT, + kLayerNormInputShapeMismatchError, + " X.shape=", x_shape, + " scale.shape=", scale_shape, + " bias.shape=", bias_shape, + " and axis=", axis); + } + } + return Status::OK(); + } + + private: + static int64_t GetBroadcastParam(const TensorShape& x_shape, + const TensorShape& scale_shape, + const TensorShape* bias_shape, + int64_t axis) { + // Note that when size of scale and bias is norm_size, it won't enter this function (see CheckInputs). + + // X shape is (B, S, ...) + if (axis == 2 && + x_shape.NumDimensions() >= 3 && + x_shape.NumDimensions() == scale_shape.NumDimensions() && + (bias_shape == nullptr || *bias_shape == scale_shape)) { + for (size_t i = 2; i < x_shape.NumDimensions(); ++i) { + if (x_shape.GetDims()[i] != scale_shape.GetDims()[i]) { + // scale cannot be broadcasted to X. It is invalid input. + return kLayerNormInvalidInput; + } + } + + if (x_shape.GetDims()[0] == scale_shape.GetDims()[0]) { + // scale and bias shape is (B, S, ...). + if (x_shape.GetDims()[1] == scale_shape.GetDims()[1]) { + return 1; + } + + // scale and bias shape is (B, 1, ...), returns S + if (scale_shape.GetDims()[1] == 1) { + return x_shape.GetDims()[1]; + } + } else if (scale_shape.GetDims()[0] == 1) { + // scale and bias shape is (1, S, ...), returns -S + if (x_shape.GetDims()[1] == scale_shape.GetDims()[1]) { + return -(x_shape.GetDims()[1]); + } + } + } + + // Other cases that are not supported. + return kLayerNormInvalidInput; + } +}; + +} // namespace onnxruntime diff --git a/onnxruntime/core/providers/cpu/nn/layer_norm_impl.cc b/onnxruntime/core/providers/cpu/nn/layer_norm_impl.cc index 24a5dcab225c4..9a6295def4de0 100644 --- a/onnxruntime/core/providers/cpu/nn/layer_norm_impl.cc +++ b/onnxruntime/core/providers/cpu/nn/layer_norm_impl.cc @@ -2,6 +2,7 @@ // Licensed under the MIT License. #include "layer_norm_impl.h" +#include "layer_norm_helper.h" #include "core/common/safeint.h" #include "core/framework/tensor.h" @@ -24,6 +25,7 @@ void ComputeJob( const T* bias_data, const ptrdiff_t task_idx, const int64_t norm_size, + const int64_t broadcast_param, const float* scale_float_ptr, const float* bias_float_ptr, float epsilon, @@ -55,13 +57,16 @@ void ComputeJob( mean_square = sqrt(mean_square / norm_size - mean * mean + epsilon); } - for (int64_t h = 0; h < norm_size; h++) { + // Compute the offset of gamma and beta to support broadcasting. + int64_t i = LAYER_NORM_SCALE_BIAS_OFFSET(broadcast_param, task_idx, norm_size); + + for (int64_t h = 0; h < norm_size; h++, i++) { if (simplified) { - p_output[h] = p_output[h] / mean_square * scale_data[h]; + p_output[h] = p_output[h] / mean_square * scale_data[i]; } else if (nullptr == bias_data) { - p_output[h] = (p_output[h] - mean) / mean_square * scale_data[h]; + p_output[h] = (p_output[h] - mean) / mean_square * scale_data[i]; } else { - p_output[h] = (p_output[h] - mean) / mean_square * scale_data[h] + bias_data[h]; + p_output[h] = (p_output[h] - mean) / mean_square * scale_data[i] + bias_data[i]; } } @@ -82,6 +87,7 @@ void ComputeJob( const MLFloat16* bias_data, const ptrdiff_t task_idx, const int64_t norm_size, + const int64_t broadcast_param, const float* scale_float_ptr, const float* bias_float_ptr, float epsilon, @@ -120,13 +126,16 @@ void ComputeJob( mean_square = sqrt(mean_square / norm_size - mean * mean + epsilon); } - for (size_t h = 0; h < num_elems; h++) { + // Compute the offset of gamma and beta to support broadcasting. + int64_t i = LAYER_NORM_SCALE_BIAS_OFFSET(broadcast_param, task_idx, norm_size); + + for (size_t h = 0; h < num_elems; h++, i++) { if (simplified) { - output_float_ptr[h] = output_float_ptr[h] / mean_square * scale_float_ptr[h]; + output_float_ptr[h] = output_float_ptr[h] / mean_square * scale_float_ptr[i]; } else if (nullptr == bias_float_ptr) { - output_float_ptr[h] = (output_float_ptr[h] - mean) / mean_square * scale_float_ptr[h]; + output_float_ptr[h] = (output_float_ptr[h] - mean) / mean_square * scale_float_ptr[i]; } else { - output_float_ptr[h] = (output_float_ptr[h] - mean) / mean_square * scale_float_ptr[h] + bias_float_ptr[h]; + output_float_ptr[h] = (output_float_ptr[h] - mean) / mean_square * scale_float_ptr[i] + bias_float_ptr[i]; } } @@ -161,9 +170,7 @@ LayerNormImpl::LayerNormImpl(const OpKernelInfo& op_kernel_info, bool simplified simplified_{simplified}, contrib_op_{contrib_op}, prepacked_scale_fp32_data_(nullptr), - prepacked_scale_fp32_size_(0), - prepacked_bias_fp32_data_(nullptr), - prepacked_bias_fp32_size_(0) { + prepacked_bias_fp32_data_(nullptr) { ORT_ENFORCE(op_kernel_info.GetAttr("axis", &axis_).IsOK()); ORT_ENFORCE(op_kernel_info.GetAttr("epsilon", &epsilon_).IsOK()); } @@ -179,8 +186,8 @@ Status LayerNormImpl::ComputeImpl(OpKernelContext* p_ctx, int64_t orig_axis, flo const T* bias_data = (simplified || nullptr == bias) ? nullptr : bias->Data(); const TensorShape& x_shape = X->Shape(); - size_t scale_size = scale ? static_cast(scale->Shape().Size()) : prepacked_scale_fp32_size_; - size_t bias_size = bias ? static_cast(bias->Shape().Size()) : prepacked_bias_fp32_size_; + const TensorShape& scale_shape = scale ? scale->Shape() : prepacked_scale_fp32_shape_; + const TensorShape& bias_shape = bias ? bias->Shape() : prepacked_bias_fp32_shape_; Tensor* Y = p_ctx->Output(0, x_shape); T* Y_data = Y->MutableData(); @@ -215,7 +222,7 @@ Status LayerNormImpl::ComputeImpl(OpKernelContext* p_ctx, int64_t orig_axis, flo AllocatorPtr alloc; ORT_RETURN_IF_ERROR(p_ctx->GetTempSpaceAllocator(&alloc)); - return ComputeWithoutContext(X_data, x_shape, scale_data, scale_size, bias_data, bias_size, Y_data, mean_data, + return ComputeWithoutContext(X_data, x_shape, scale_data, scale_shape, bias_data, bias_shape, Y_data, mean_data, inv_std_dev_data, thread_pool, axis, epsilon, simplified, alloc); } @@ -234,10 +241,10 @@ Status LayerNormImpl::PrePack(const Tensor& tensor, int input_idx, AllocatorPtr is_packed = false; if (input_idx == 1) { // scale - prepacked_scale_fp32_size_ = static_cast(tensor.Shape().Size()); + prepacked_scale_fp32_shape_ = tensor.Shape(); ConvertMLFloat16ToFloatIfNeeded(tensor, alloc, prepacked_scale_fp32_data_, is_packed); } else if (input_idx == 2) { // bias - prepacked_bias_fp32_size_ = static_cast(tensor.Shape().Size()); + prepacked_bias_fp32_shape_ = tensor.Shape(); ConvertMLFloat16ToFloatIfNeeded(tensor, alloc, prepacked_bias_fp32_data_, is_packed); } @@ -249,9 +256,9 @@ Status LayerNormImpl::ComputeWithoutContext( const T* X_data, const TensorShape& x_shape, const T* scale_data, - size_t scale_size, + const TensorShape& scale_shape, const T* bias_data, - size_t bias_size, + const TensorShape& bias_shape, T* Y_data, U* mean_data, U* inv_std_dev_data, @@ -260,35 +267,28 @@ Status LayerNormImpl::ComputeWithoutContext( float epsilon, bool simplified, AllocatorPtr alloc) const { - int64_t norm_count = x_shape.SizeToDimension(onnxruntime::narrow(axis)); - int64_t norm_size = x_shape.SizeFromDimension(onnxruntime::narrow(axis)); - - if (static_cast(scale_size) != norm_size || (bias_data && static_cast(bias_size) != norm_size)) { - return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT, - "Size of X.shape()[axis:] == ", norm_size, - ". Size of scale and bias (if provided) must match this. Got scale size of ", - scale_size, " and bias size of ", bias_size); - } + LayerNormParams params; + ORT_RETURN_IF_ERROR(LayerNormHelper::CheckInputs(x_shape, scale_shape, bias_shape, bias_data != nullptr, axis, params)); IAllocatorUniquePtr scale_fp32; IAllocatorUniquePtr bias_fp32; if constexpr (std::is_same_v) { if (prepacked_scale_fp32_data_ == nullptr) { - const size_t num_elems = static_cast(norm_size); + const size_t num_elems = static_cast(params.scale_size); scale_fp32 = IAllocator::MakeUniquePtr(alloc, num_elems); MlasConvertHalfToFloatBuffer(scale_data, scale_fp32.get(), num_elems); } if (prepacked_bias_fp32_data_ == nullptr && bias_data) { - const size_t num_elems = static_cast(norm_size); + const size_t num_elems = static_cast(params.bias_size); bias_fp32 = IAllocator::MakeUniquePtr(alloc, num_elems); MlasConvertHalfToFloatBuffer(bias_data, bias_fp32.get(), num_elems); } } concurrency::ThreadPool::TryBatchParallelFor( - thread_pool, static_cast(norm_count), + thread_pool, static_cast(params.num_rows), [&](ptrdiff_t task_idx) { - ComputeJob(X_data, scale_data, bias_data, task_idx, norm_size, + ComputeJob(X_data, scale_data, bias_data, task_idx, params.norm_size, params.broadcast_param, prepacked_scale_fp32_data_ ? prepacked_scale_fp32_data_.get() : scale_fp32.get(), prepacked_bias_fp32_data_ ? prepacked_bias_fp32_data_.get() : bias_fp32.get(), epsilon, simplified, Y_data, mean_data, inv_std_dev_data, alloc); diff --git a/onnxruntime/core/providers/cpu/nn/layer_norm_impl.h b/onnxruntime/core/providers/cpu/nn/layer_norm_impl.h index f8b528b398cba..a2debb1679ebd 100644 --- a/onnxruntime/core/providers/cpu/nn/layer_norm_impl.h +++ b/onnxruntime/core/providers/cpu/nn/layer_norm_impl.h @@ -24,9 +24,9 @@ class LayerNormImpl : public OpKernel { const T* X_data, const TensorShape& x_shape, const T* scale_data, - size_t scale_size, + const TensorShape& scale_shape, const T* bias_data, - size_t bias_size, + const TensorShape& bias_shape, T* Y_data, U* mean_data, U* inv_std_dev, @@ -64,9 +64,9 @@ class LayerNormImpl : public OpKernel { const bool simplified_; const bool contrib_op_; IAllocatorUniquePtr prepacked_scale_fp32_data_; - size_t prepacked_scale_fp32_size_; + TensorShape prepacked_scale_fp32_shape_; IAllocatorUniquePtr prepacked_bias_fp32_data_; - size_t prepacked_bias_fp32_size_; + TensorShape prepacked_bias_fp32_shape_; }; } // namespace onnxruntime diff --git a/onnxruntime/core/providers/cpu/nn/pool.cc b/onnxruntime/core/providers/cpu/nn/pool.cc index 9230398680a64..d6b9ed693432b 100644 --- a/onnxruntime/core/providers/cpu/nn/pool.cc +++ b/onnxruntime/core/providers/cpu/nn/pool.cc @@ -399,29 +399,26 @@ Status LpPoolV18::Compute(OpKernelContext* context) const { return Status::OK(); } -ONNX_CPU_OPERATOR_VERSIONED_KERNEL(AveragePool, 7, 9, - KernelDefBuilder().TypeConstraint("T", DataTypeImpl::GetTensorType()), - Pool); - -ONNX_CPU_OPERATOR_VERSIONED_KERNEL(AveragePool, 10, 10, - KernelDefBuilder().TypeConstraint("T", DataTypeImpl::GetTensorType()), - Pool); - -ONNX_CPU_OPERATOR_VERSIONED_KERNEL(AveragePool, 11, 18, - KernelDefBuilder().TypeConstraint("T", DataTypeImpl::GetTensorType()), - Pool); - -ONNX_CPU_OPERATOR_KERNEL(AveragePool, 19, - KernelDefBuilder() - .TypeConstraint( - "T", - DataTypeImpl::GetTensorType()), - AveragePoolV19); - -ONNX_CPU_OPERATOR_VERSIONED_KERNEL(MaxPool, 1, 7, - KernelDefBuilder().TypeConstraint("T", DataTypeImpl::GetTensorType()), - Pool>); - +#define REGISTER_KERNEL_VERSIONED(OpName, START_VER, END_VER, ...) \ + ONNX_CPU_OPERATOR_VERSIONED_KERNEL( \ + OpName, \ + START_VER, \ + END_VER, \ + KernelDefBuilder().TypeConstraint("T", DataTypeImpl::GetTensorType()), __VA_ARGS__); + +#define REGISTER_KERNEL(OpName, VER, ...) \ + ONNX_CPU_OPERATOR_KERNEL( \ + OpName, \ + VER, \ + KernelDefBuilder().TypeConstraint("T", DataTypeImpl::GetTensorType()), __VA_ARGS__); + +REGISTER_KERNEL_VERSIONED(AveragePool, 7, 9, Pool); +REGISTER_KERNEL_VERSIONED(AveragePool, 10, 10, Pool); +REGISTER_KERNEL_VERSIONED(AveragePool, 11, 18, Pool); +REGISTER_KERNEL_VERSIONED(AveragePool, 19, 21, AveragePoolV19); +REGISTER_KERNEL(AveragePool, 22, AveragePoolV19); + +REGISTER_KERNEL_VERSIONED(MaxPool, 1, 7, Pool>); ONNX_CPU_OPERATOR_VERSIONED_KERNEL(MaxPool, 8, 11, KernelDefBuilder() .TypeConstraint( @@ -430,7 +427,14 @@ ONNX_CPU_OPERATOR_VERSIONED_KERNEL(MaxPool, 8, 11, .TypeConstraint("I", DataTypeImpl::GetTensorType()), MaxPoolV8); -ONNX_CPU_OPERATOR_KERNEL(MaxPool, 12, +ONNX_CPU_OPERATOR_VERSIONED_KERNEL(MaxPool, 12, 21, + KernelDefBuilder() + .TypeConstraint( + "T", + BuildKernelDefConstraintsFromTypeList()) + .TypeConstraint("I", DataTypeImpl::GetTensorType()), + MaxPoolV8); +ONNX_CPU_OPERATOR_KERNEL(MaxPool, 22, KernelDefBuilder() .TypeConstraint( "T", @@ -438,29 +442,17 @@ ONNX_CPU_OPERATOR_KERNEL(MaxPool, 12, .TypeConstraint("I", DataTypeImpl::GetTensorType()), MaxPoolV8); -ONNX_CPU_OPERATOR_VERSIONED_KERNEL(LpPool, 2, 10, - KernelDefBuilder().TypeConstraint("T", DataTypeImpl::GetTensorType()), - Pool); - -ONNX_CPU_OPERATOR_VERSIONED_KERNEL(LpPool, 11, 17, - KernelDefBuilder().TypeConstraint("T", DataTypeImpl::GetTensorType()), - Pool); - -ONNX_CPU_OPERATOR_KERNEL(LpPool, 18, - KernelDefBuilder() - .TypeConstraint( - "T", - DataTypeImpl::GetTensorType()), - LpPoolV18); +REGISTER_KERNEL_VERSIONED(LpPool, 2, 10, Pool); +REGISTER_KERNEL_VERSIONED(LpPool, 11, 17, Pool); +REGISTER_KERNEL_VERSIONED(LpPool, 18, 21, LpPoolV18); +REGISTER_KERNEL(LpPool, 22, LpPoolV18); -ONNX_CPU_OPERATOR_KERNEL(GlobalLpPool, 2, KernelDefBuilder().TypeConstraint("T", DataTypeImpl::GetTensorType()), - Pool); +REGISTER_KERNEL(GlobalLpPool, 2, Pool); -ONNX_CPU_OPERATOR_KERNEL(GlobalAveragePool, 1, - KernelDefBuilder().TypeConstraint("T", DataTypeImpl::GetTensorType()), - Pool); +REGISTER_KERNEL_VERSIONED(GlobalAveragePool, 1, 21, Pool); +REGISTER_KERNEL(GlobalAveragePool, 22, Pool); -ONNX_CPU_OPERATOR_KERNEL(GlobalMaxPool, 1, KernelDefBuilder().TypeConstraint("T", DataTypeImpl::GetTensorType()), - Pool>); +REGISTER_KERNEL_VERSIONED(GlobalMaxPool, 1, 21, Pool>); +REGISTER_KERNEL(GlobalMaxPool, 22, Pool>); } // namespace onnxruntime diff --git a/onnxruntime/core/providers/cpu/nn/pool_attributes.h b/onnxruntime/core/providers/cpu/nn/pool_attributes.h index 118cb4a3ba4bd..fbbd4273757d5 100644 --- a/onnxruntime/core/providers/cpu/nn/pool_attributes.h +++ b/onnxruntime/core/providers/cpu/nn/pool_attributes.h @@ -150,14 +150,14 @@ struct PoolAttributes { case AutoPadType::VALID: *pad_head = 0; *pad_tail = 0; - *out_size = ComputeOutputSize(in_size, stride, kernel, 0, dilation); + *out_size = ComputeOutputSize(in_size, stride, kernel, 0, 0, dilation); break; case AutoPadType::SAME_LOWER: { int64_t legacy_target_size = (in_size + stride - 1) / stride; int64_t pad_needed = (legacy_target_size - 1) * stride + kernel - in_size; *pad_head = (pad_needed + 1) / 2; *pad_tail = pad_needed - *pad_head; - *out_size = ComputeOutputSize(in_size, stride, kernel, pad_needed, dilation); + *out_size = ComputeOutputSize(in_size, stride, kernel, *pad_head, *pad_tail, dilation); break; } case AutoPadType::SAME_UPPER: { @@ -165,7 +165,7 @@ struct PoolAttributes { int64_t pad_needed = (legacy_target_size - 1) * stride + kernel - in_size; *pad_head = pad_needed / 2; *pad_tail = pad_needed - *pad_head; - *out_size = ComputeOutputSize(in_size, stride, kernel, pad_needed, dilation); + *out_size = ComputeOutputSize(in_size, stride, kernel, *pad_head, *pad_tail, dilation); break; } default: { @@ -173,7 +173,7 @@ struct PoolAttributes { } } } else { - *out_size = ComputeOutputSize(in_size, stride, kernel, *pad_head + *pad_tail, dilation); + *out_size = ComputeOutputSize(in_size, stride, kernel, *pad_head, *pad_tail, dilation); } } #if defined(_MSC_VER) && !defined(__clang__) @@ -184,13 +184,21 @@ struct PoolAttributes { int64_t ComputeOutputSize(int64_t in_size, int64_t stride, int64_t kernel, - int64_t pad_needed, + int64_t pad_head, + int64_t pad_tail, int64_t dilation) const { - if (ceil_mode == 0) { - return static_cast(static_cast(in_size + pad_needed - dilation * (kernel - 1) - 1) / stride + 1); + int64_t numerator = in_size + pad_head + pad_tail - dilation * (kernel - 1) - 1; + int64_t out_size = numerator / stride + 1; + + if (ceil_mode == 1) { + out_size = static_cast(std::ceil(static_cast(numerator) / stride)) + 1; + // Ensure that the last pooling starts inside the image (at least 1 pixel) + // Reference: https://github.com/onnx/onnx/pull/5741 + if ((out_size - 1) * stride >= in_size + pad_head) { + --out_size; + } } - return static_cast( - std::ceil(static_cast(in_size + pad_needed - dilation * (kernel - 1) - 1) / stride + 1)); + return out_size; } #if defined(_MSC_VER) && !defined(__clang__) #pragma warning(pop) diff --git a/onnxruntime/core/providers/cpu/nn/pool_functors.h b/onnxruntime/core/providers/cpu/nn/pool_functors.h index d3205278b72f6..476a9a0338969 100644 --- a/onnxruntime/core/providers/cpu/nn/pool_functors.h +++ b/onnxruntime/core/providers/cpu/nn/pool_functors.h @@ -406,6 +406,7 @@ struct AveragePool1DTask final { for (int64_t ph = 0; ph < pooled_height; ++ph) { int64_t hstart = ph * stride_h - pads[0]; int64_t hend = hstart + kernel_shape[0] * dilation_h; + hend = std::min(hend, height + pads[1]); y_d[ph] = 0; int total_elements = 0; for (int64_t h = hstart; h < hend; h += dilation_h) { @@ -461,9 +462,11 @@ struct AveragePool2DTask final { for (int64_t ph = 0; ph < pooled_height; ++ph) { int64_t hstart = ph * stride_h - pads[0]; int64_t hend = hstart + kernel_shape[0] * dilation_h; + hend = std::min(hend, height + pads[1]); for (int64_t pw = 0; pw < pooled_width; ++pw) { int64_t wstart = pw * stride_w - pads[1]; int64_t wend = wstart + kernel_shape[1] * dilation_w; + wend = std::min(wend, width + pads[3]); const int64_t pool_index = ph * pooled_width + pw; y_d[pool_index] = 0; int total_elements = 0; @@ -532,12 +535,15 @@ struct AveragePool3DTask { for (int64_t ph = 0; ph < pooled_height; ++ph) { int64_t hstart = ph * stride_h - pads[0]; int64_t hend = hstart + kernel_shape[0] * dilation_h; + hend = std::min(hend, height + pads[1]); for (int64_t pw = 0; pw < pooled_width; ++pw) { int64_t wstart = pw * stride_w - pads[1]; int64_t wend = wstart + kernel_shape[1] * dilation_w; + wend = std::min(wend, width + pads[3]); for (int64_t pd = 0; pd < pooled_depth; ++pd) { int64_t dstart = pd * stride_d - pads[2]; int64_t dend = dstart + kernel_shape[2] * dilation_d; + dend = std::min(dend, depth + pads[5]); const int64_t pool_index = ph * pooled_width * pooled_depth + pw * pooled_depth + pd; y_d[pool_index] = 0; int total_elements = 0; diff --git a/onnxruntime/core/providers/cpu/rnn/deep_cpu_gru.cc b/onnxruntime/core/providers/cpu/rnn/deep_cpu_gru.cc index b78c5236e6fab..c0171f7728ea8 100644 --- a/onnxruntime/core/providers/cpu/rnn/deep_cpu_gru.cc +++ b/onnxruntime/core/providers/cpu/rnn/deep_cpu_gru.cc @@ -152,9 +152,18 @@ ONNX_CPU_OPERATOR_VERSIONED_KERNEL( .TypeConstraint("T1", DataTypeImpl::GetTensorType()), DeepCpuGruOp); -ONNX_CPU_OPERATOR_KERNEL( +ONNX_CPU_OPERATOR_VERSIONED_KERNEL( GRU, 14, + 21, + KernelDefBuilder().TypeConstraint("T", {DataTypeImpl::GetTensorType(), + DataTypeImpl::GetTensorType()}) + .TypeConstraint("T1", DataTypeImpl::GetTensorType()), + DeepCpuGruOp); + +ONNX_CPU_OPERATOR_KERNEL( + GRU, + 22, KernelDefBuilder().TypeConstraint("T", {DataTypeImpl::GetTensorType(), DataTypeImpl::GetTensorType()}) .TypeConstraint("T1", DataTypeImpl::GetTensorType()), diff --git a/onnxruntime/core/providers/cpu/rnn/deep_cpu_lstm.cc b/onnxruntime/core/providers/cpu/rnn/deep_cpu_lstm.cc index 09bbf6c4c79e6..e95ad707cf2b0 100644 --- a/onnxruntime/core/providers/cpu/rnn/deep_cpu_lstm.cc +++ b/onnxruntime/core/providers/cpu/rnn/deep_cpu_lstm.cc @@ -163,7 +163,14 @@ ONNX_CPU_OPERATOR_VERSIONED_KERNEL(LSTM, 7, 13, .TypeConstraint("T1", DataTypeImpl::GetTensorType()), DeepCpuLstmOp); -ONNX_CPU_OPERATOR_KERNEL(LSTM, 14, +ONNX_CPU_OPERATOR_VERSIONED_KERNEL(LSTM, 14, 21, + KernelDefBuilder() + .TypeConstraint("T", {DataTypeImpl::GetTensorType(), + DataTypeImpl::GetTensorType()}) + .TypeConstraint("T1", DataTypeImpl::GetTensorType()), + DeepCpuLstmOp); + +ONNX_CPU_OPERATOR_KERNEL(LSTM, 22, KernelDefBuilder() .TypeConstraint("T", {DataTypeImpl::GetTensorType(), DataTypeImpl::GetTensorType()}) diff --git a/onnxruntime/core/providers/cpu/rnn/rnn.cc b/onnxruntime/core/providers/cpu/rnn/rnn.cc index 82a689c830493..f061dfcf4827c 100644 --- a/onnxruntime/core/providers/cpu/rnn/rnn.cc +++ b/onnxruntime/core/providers/cpu/rnn/rnn.cc @@ -24,9 +24,17 @@ ONNX_CPU_OPERATOR_VERSIONED_KERNEL( .TypeConstraint("T1", DataTypeImpl::GetTensorType()), RNN); +ONNX_CPU_OPERATOR_VERSIONED_KERNEL( + RNN, + 14, 21, + KernelDefBuilder() + .TypeConstraint("T", DataTypeImpl::GetTensorType()) + .TypeConstraint("T1", DataTypeImpl::GetTensorType()), + RNN); + ONNX_CPU_OPERATOR_KERNEL( RNN, - 14, + 22, KernelDefBuilder() .TypeConstraint("T", DataTypeImpl::GetTensorType()) .TypeConstraint("T1", DataTypeImpl::GetTensorType()), diff --git a/onnxruntime/core/providers/cpu/tensor/eye_like.cc b/onnxruntime/core/providers/cpu/tensor/eye_like.cc index ce7048fb8e76f..5972638a88fbd 100644 --- a/onnxruntime/core/providers/cpu/tensor/eye_like.cc +++ b/onnxruntime/core/providers/cpu/tensor/eye_like.cc @@ -18,9 +18,23 @@ ORT_SPECIFY_OP_KERNEL_ARG_DEFAULT_TYPES_ALL_OPSETS( using EnabledEyeLikeDataTypes = ORT_OP_KERNEL_ARG_ENABLED_TYPE_LIST_ALL_OPSETS( kCpuExecutionProvider, kOnnxDomain, EyeLike, Output, 0); -ONNX_CPU_OPERATOR_KERNEL( +ONNX_CPU_OPERATOR_VERSIONED_KERNEL( EyeLike, 9, + 21, + KernelDefBuilder() + .TypeConstraint( + "T1", + BuildKernelDefConstraintsFromTypeList()) + .TypeConstraint( + "T2", + BuildKernelDefConstraintsFromTypeList()), + EyeLike); + +// Opset 22 starts to support bfloat16 +ONNX_CPU_OPERATOR_KERNEL( + EyeLike, + 22, KernelDefBuilder() .TypeConstraint( "T1", diff --git a/onnxruntime/core/providers/cpu/tensor/grid_sample.cc b/onnxruntime/core/providers/cpu/tensor/grid_sample.cc index a83ba378d7f1e..d673fcce223e6 100644 --- a/onnxruntime/core/providers/cpu/tensor/grid_sample.cc +++ b/onnxruntime/core/providers/cpu/tensor/grid_sample.cc @@ -11,23 +11,29 @@ namespace onnxruntime { -#define REGISTER_KERNEL_TYPED(T) \ - ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_EX(GridSample, kOnnxDomain, 16, 19, T, kCpuExecutionProvider, \ - KernelDefBuilder() \ - .TypeConstraint("T1", DataTypeImpl::GetTensorType()) \ - .TypeConstraint("T2", DataTypeImpl::GetTensorType()), \ - GridSample); - -#define REGISTER_KERNEL_TYPED_20(T) \ - ONNX_OPERATOR_TYPED_KERNEL_EX(GridSample, kOnnxDomain, 20, T, kCpuExecutionProvider, \ - KernelDefBuilder() \ - .TypeConstraint("T1", DataTypeImpl::GetTensorType()) \ - .TypeConstraint("T2", DataTypeImpl::GetTensorType()), \ - GridSample); - -REGISTER_KERNEL_TYPED(float) -REGISTER_KERNEL_TYPED_20(float) -REGISTER_KERNEL_TYPED_20(double) +#define REGISTER_VERSIONED_KERNEL_TYPED(START_VER, END_VER, T) \ + ONNX_CPU_OPERATOR_VERSIONED_TYPED_KERNEL(GridSample, START_VER, END_VER, T, \ + KernelDefBuilder() \ + .TypeConstraint("T1", DataTypeImpl::GetTensorType()) \ + .TypeConstraint("T2", DataTypeImpl::GetTensorType()), \ + GridSample); + +#define REGISTER_KERNEL_TYPED(VER, T) \ + ONNX_CPU_OPERATOR_TYPED_KERNEL(GridSample, VER, T, \ + KernelDefBuilder() \ + .TypeConstraint("T1", DataTypeImpl::GetTensorType()) \ + .TypeConstraint("T2", DataTypeImpl::GetTensorType()), \ + GridSample); + +REGISTER_VERSIONED_KERNEL_TYPED(16, 19, float) +REGISTER_VERSIONED_KERNEL_TYPED(16, 19, double) + +REGISTER_VERSIONED_KERNEL_TYPED(20, 21, float) +REGISTER_VERSIONED_KERNEL_TYPED(20, 21, double) + +// Opset 22 supports BFloat16 +REGISTER_KERNEL_TYPED(22, float) +REGISTER_KERNEL_TYPED(22, double) // Restore normalized location to actual image location // When align_corners is true: diff --git a/onnxruntime/core/providers/cpu/tensor/space_depth_ops.cc b/onnxruntime/core/providers/cpu/tensor/space_depth_ops.cc index a23f80671c9ac..7e1049c402210 100644 --- a/onnxruntime/core/providers/cpu/tensor/space_depth_ops.cc +++ b/onnxruntime/core/providers/cpu/tensor/space_depth_ops.cc @@ -43,7 +43,8 @@ ONNX_CPU_OPERATOR_VERSIONED_KERNEL( 12, KernelDefBuilder() .TypeConstraint("T", {DataTypeImpl::GetTensorType(), - DataTypeImpl::GetTensorType()}), + DataTypeImpl::GetTensorType(), + DataTypeImpl::GetTensorType()}), DepthToSpace); ONNX_CPU_OPERATOR_KERNEL( @@ -51,7 +52,8 @@ ONNX_CPU_OPERATOR_KERNEL( 13, KernelDefBuilder() .TypeConstraint("T", {DataTypeImpl::GetTensorType(), - DataTypeImpl::GetTensorType()}), + DataTypeImpl::GetTensorType(), + DataTypeImpl::GetTensorType()}), DepthToSpace); // intermediate tensor shapes are: @@ -196,6 +198,19 @@ Status DepthToSpace::Compute(OpKernelContext* context) const { onnxruntime::narrow(blocksize_), onnxruntime::narrow(input_width), onnxruntime::narrow(blocksize_)); + } else if (input.IsDataType()) { + SpaceDepthOpCpuImpl(input, output, permutation, + onnxruntime::narrow(batch), + onnxruntime::narrow(dim1), + onnxruntime::narrow(blocksize_), + onnxruntime::narrow(dim3), + onnxruntime::narrow(input_height), + onnxruntime::narrow(input_width), + onnxruntime::narrow(input_depth / blocksize_ / blocksize_), + onnxruntime::narrow(input_height), + onnxruntime::narrow(blocksize_), + onnxruntime::narrow(input_width), + onnxruntime::narrow(blocksize_)); } else { // user will not see this as the kernel doesn't claim support for types other than float and double return ORT_MAKE_STATUS(ONNXRUNTIME, FAIL, "Unsupported input type in DepthToSpace op: ", input.DataType()); diff --git a/onnxruntime/core/providers/cuda/cuda_execution_provider.cc b/onnxruntime/core/providers/cuda/cuda_execution_provider.cc index d4013a7dc3d57..4a10de153653c 100644 --- a/onnxruntime/core/providers/cuda/cuda_execution_provider.cc +++ b/onnxruntime/core/providers/cuda/cuda_execution_provider.cc @@ -1218,29 +1218,45 @@ class ONNX_OPERATOR_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 14, C class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 14, float, Relu); class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 14, double, Relu); class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 14, MLFloat16, Relu); +class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 14, int8_t, Add); +class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 14, int16_t, Add); class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 14, int32_t, Add); class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 14, int64_t, Add); +class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 14, uint8_t, Add); +class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 14, uint16_t, Add); class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 14, uint32_t, Add); class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 14, uint64_t, Add); class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 14, float, Add); class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 14, double, Add); class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 14, MLFloat16, Add); +class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 14, int8_t, Sub); +class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 14, int16_t, Sub); class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 14, int32_t, Sub); class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 14, int64_t, Sub); +class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 14, uint8_t, Sub); +class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 14, uint16_t, Sub); class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 14, uint32_t, Sub); class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 14, uint64_t, Sub); class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 14, float, Sub); class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 14, double, Sub); class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 14, MLFloat16, Sub); +class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 14, int8_t, Mul); +class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 14, int16_t, Mul); class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 14, int32_t, Mul); class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 14, int64_t, Mul); +class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 14, uint8_t, Mul); +class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 14, uint16_t, Mul); class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 14, uint32_t, Mul); class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 14, uint64_t, Mul); class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 14, float, Mul); class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 14, double, Mul); class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 14, MLFloat16, Mul); +class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 14, int8_t, Div); +class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 14, int16_t, Div); class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 14, int32_t, Div); class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 14, int64_t, Div); +class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 14, uint8_t, Div); +class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 14, uint16_t, Div); class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 14, uint32_t, Div); class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 14, uint64_t, Div); class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 14, float, Div); @@ -2183,29 +2199,45 @@ static Status RegisterCudaKernels(KernelRegistry& kernel_registry) { BuildKernelCreateInfo, BuildKernelCreateInfo, BuildKernelCreateInfo, + BuildKernelCreateInfo, + BuildKernelCreateInfo, BuildKernelCreateInfo, BuildKernelCreateInfo, + BuildKernelCreateInfo, + BuildKernelCreateInfo, BuildKernelCreateInfo, BuildKernelCreateInfo, BuildKernelCreateInfo, BuildKernelCreateInfo, BuildKernelCreateInfo, + BuildKernelCreateInfo, + BuildKernelCreateInfo, BuildKernelCreateInfo, BuildKernelCreateInfo, + BuildKernelCreateInfo, + BuildKernelCreateInfo, BuildKernelCreateInfo, BuildKernelCreateInfo, BuildKernelCreateInfo, BuildKernelCreateInfo, BuildKernelCreateInfo, + BuildKernelCreateInfo, + BuildKernelCreateInfo, BuildKernelCreateInfo, BuildKernelCreateInfo, + BuildKernelCreateInfo, + BuildKernelCreateInfo, BuildKernelCreateInfo, BuildKernelCreateInfo, BuildKernelCreateInfo, BuildKernelCreateInfo, BuildKernelCreateInfo, + BuildKernelCreateInfo, + BuildKernelCreateInfo, BuildKernelCreateInfo, BuildKernelCreateInfo, + BuildKernelCreateInfo, + BuildKernelCreateInfo, BuildKernelCreateInfo, BuildKernelCreateInfo, BuildKernelCreateInfo, diff --git a/onnxruntime/core/providers/cuda/cuda_profiler.h b/onnxruntime/core/providers/cuda/cuda_profiler.h index 4930e55351615..1d8ecddce4c79 100644 --- a/onnxruntime/core/providers/cuda/cuda_profiler.h +++ b/onnxruntime/core/providers/cuda/cuda_profiler.h @@ -33,9 +33,9 @@ class CudaProfiler final : public EpProfiler { ORT_DISALLOW_COPY_ASSIGNMENT_AND_MOVE(CudaProfiler); ~CudaProfiler() {} bool StartProfiling(TimePoint) override { return true; } - void EndProfiling(TimePoint, Events&) override {}; - void Start(uint64_t) override{}; - void Stop(uint64_t) override{}; + void EndProfiling(TimePoint, Events&) override {} + void Start(uint64_t) override {} + void Stop(uint64_t) override {} }; #endif diff --git a/onnxruntime/core/providers/cuda/math/binary_elementwise_ops.cc b/onnxruntime/core/providers/cuda/math/binary_elementwise_ops.cc index 8aca8635a24fe..33b41453dafcb 100644 --- a/onnxruntime/core/providers/cuda/math/binary_elementwise_ops.cc +++ b/onnxruntime/core/providers/cuda/math/binary_elementwise_ops.cc @@ -200,11 +200,15 @@ Status BinaryElementwise::Prepare(OpKernelContext* context, Bin BINARY_OP_TYPED(name, ver, double) \ BINARY_OP_TYPED(name, ver, BFloat16) -#define BINARY_OP_UZILHFD(name, ver) \ - BINARY_OP_TYPED(name, ver, uint32_t) \ - BINARY_OP_TYPED(name, ver, uint64_t) \ - BINARY_OP_TYPED(name, ver, int32_t) \ - BINARY_OP_TYPED(name, ver, int64_t) \ +#define BINARY_OP_BWUZCSILHFD(name, ver) \ + BINARY_OP_TYPED(name, ver, uint8_t) \ + BINARY_OP_TYPED(name, ver, uint16_t) \ + BINARY_OP_TYPED(name, ver, uint32_t) \ + BINARY_OP_TYPED(name, ver, uint64_t) \ + BINARY_OP_TYPED(name, ver, int8_t) \ + BINARY_OP_TYPED(name, ver, int16_t) \ + BINARY_OP_TYPED(name, ver, int32_t) \ + BINARY_OP_TYPED(name, ver, int64_t) \ BINARY_OP_HFD(name, ver) #define BINARY_OP_REGISTER_VERSIONED_OIL(name, startver, endver) \ @@ -279,10 +283,10 @@ BINARY_OP_VERSIONED_UZILHFD_WITH_BF16(Sub, 13, 13) BINARY_OP_VERSIONED_UZILHFD_WITH_BF16(Mul, 13, 13) BINARY_OP_VERSIONED_UZILHFD_WITH_BF16(Div, 13, 13) -BINARY_OP_UZILHFD(Add, 14) -BINARY_OP_UZILHFD(Sub, 14) -BINARY_OP_UZILHFD(Mul, 14) -BINARY_OP_UZILHFD(Div, 14) +BINARY_OP_BWUZCSILHFD(Add, 14) +BINARY_OP_BWUZCSILHFD(Sub, 14) +BINARY_OP_BWUZCSILHFD(Mul, 14) +BINARY_OP_BWUZCSILHFD(Div, 14) BINARY_OP_REGISTER_VERSIONED_CLASS_HFD(Pow, Pow_7, 7, 11) BINARY_LOGICALOP_TYPED(And, 7, bool) diff --git a/onnxruntime/core/providers/cuda/math/binary_elementwise_ops_impl.cu b/onnxruntime/core/providers/cuda/math/binary_elementwise_ops_impl.cu index 0a7753783ffb1..1cc407efe8670 100644 --- a/onnxruntime/core/providers/cuda/math/binary_elementwise_ops_impl.cu +++ b/onnxruntime/core/providers/cuda/math/binary_elementwise_ops_impl.cu @@ -89,6 +89,20 @@ namespace cuda { SPECIALIZED_BINARY_ELEMENTWISE_IMPL(x, double) \ SPECIALIZED_BINARY_ELEMENTWISE_IMPL(x, BFloat16) +#define SPECIALIZED_BINARY_ELEMENTWISE_IMPL_BWUZCSILHFD(x) \ + SPECIALIZED_BINARY_ELEMENTWISE_IMPL(x, uint8_t) \ + SPECIALIZED_BINARY_ELEMENTWISE_IMPL(x, uint16_t) \ + SPECIALIZED_BINARY_ELEMENTWISE_IMPL(x, uint32_t) \ + SPECIALIZED_BINARY_ELEMENTWISE_IMPL(x, uint64_t) \ + SPECIALIZED_BINARY_ELEMENTWISE_IMPL(x, int8_t) \ + SPECIALIZED_BINARY_ELEMENTWISE_IMPL(x, int16_t) \ + SPECIALIZED_BINARY_ELEMENTWISE_IMPL(x, int32_t) \ + SPECIALIZED_BINARY_ELEMENTWISE_IMPL(x, int64_t) \ + SPECIALIZED_BINARY_ELEMENTWISE_IMPL(x, half) \ + SPECIALIZED_BINARY_ELEMENTWISE_IMPL(x, float) \ + SPECIALIZED_BINARY_ELEMENTWISE_IMPL(x, double) \ + SPECIALIZED_BINARY_ELEMENTWISE_IMPL(x, BFloat16) + #define SPECIALIZED_BINARY_ELEMENTWISE_IMPL_UZIL(x) \ SPECIALIZED_BINARY_ELEMENTWISE_IMPL(x, uint32_t) \ SPECIALIZED_BINARY_ELEMENTWISE_IMPL(x, uint64_t) \ @@ -135,11 +149,11 @@ BINARY_OPS() // D: double // O: bool -SPECIALIZED_BINARY_ELEMENTWISE_IMPL_UZILHFD(Add) +SPECIALIZED_BINARY_ELEMENTWISE_IMPL_BWUZCSILHFD(Add) SPECIALIZED_BINARY_ELEMENTWISE_IMPL(Add, bool) -SPECIALIZED_BINARY_ELEMENTWISE_IMPL_UZILHFD(Sub) -SPECIALIZED_BINARY_ELEMENTWISE_IMPL_UZILHFD(Mul) -SPECIALIZED_BINARY_ELEMENTWISE_IMPL_UZILHFD(Div) +SPECIALIZED_BINARY_ELEMENTWISE_IMPL_BWUZCSILHFD(Sub) +SPECIALIZED_BINARY_ELEMENTWISE_IMPL_BWUZCSILHFD(Mul) +SPECIALIZED_BINARY_ELEMENTWISE_IMPL_BWUZCSILHFD(Div) SPECIALIZED_BINARY_ELEMENTWISE_IMPL_HFD(Pow_7) SPECIALIZED_BINARY_ELEMENTWISE_IMPL(And, bool) SPECIALIZED_BINARY_ELEMENTWISE_IMPL(Or, bool) diff --git a/onnxruntime/core/providers/cuda/nn/layer_norm.cc b/onnxruntime/core/providers/cuda/nn/layer_norm.cc index 7dd10f9c2960c..d479261855e2d 100644 --- a/onnxruntime/core/providers/cuda/nn/layer_norm.cc +++ b/onnxruntime/core/providers/cuda/nn/layer_norm.cc @@ -4,6 +4,7 @@ #include "core/providers/shared_library/provider_api.h" #include "core/providers/cuda/nn/layer_norm.h" #include "core/providers/cuda/nn/layer_norm_impl.h" +#include "core/providers/cpu/nn/layer_norm_helper.h" #include "core/providers/cuda/cuda_common.h" namespace onnxruntime { @@ -44,20 +45,14 @@ Status LayerNorm::ComputeInternal(OpKernelContext* ctx) con auto bias_data = (simplified || (nullptr == bias)) ? nullptr : reinterpret_cast(bias->Data()); const TensorShape& x_shape = X->Shape(); - const int64_t axis = HandleNegativeAxis(axis_, x_shape.NumDimensions()); - - int n1 = gsl::narrow(x_shape.SizeToDimension(axis)); - int n2 = gsl::narrow(x_shape.SizeFromDimension(axis)); - - const auto scale_size = scale->Shape().Size(); - const auto bias_size = (bias_data) ? bias->Shape().Size() : 0; - if (n2 == 1 || scale_size != n2 || (bias_data && bias_size != n2)) { - return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT, - "Size of X.shape()[axis:] == ", n2, - ". Size of scale and bias (if provided) must match this " - "and the size must not be 1. Got scale size of ", - scale_size, " and bias size of ", bias_size); - } + auto x_num_dims = x_shape.NumDimensions(); + const int64_t axis = HandleNegativeAxis(axis_, x_num_dims); + + const TensorShape& scale_shape = scale->Shape(); + const TensorShape& bias_shape = bias_data ? bias->Shape() : TensorShape(); + + LayerNormParams params; + ORT_RETURN_IF_ERROR(LayerNormHelper::CheckInputs(x_shape, scale_shape, bias_shape, bias_data != nullptr, axis, params)); // Outputs Tensor* Y = ctx->Output(0, x_shape); @@ -65,7 +60,7 @@ Status LayerNorm::ComputeInternal(OpKernelContext* ctx) con // Mean and variance std::vector mean_inv_std_var_dim; - for (int i = 0; i < static_cast(x_shape.NumDimensions()); ++i) { + for (int i = 0; i < static_cast(x_num_dims); ++i) { if (i < axis) { mean_inv_std_var_dim.emplace_back(x_shape.GetDims()[i]); } else { @@ -93,8 +88,11 @@ Status LayerNorm::ComputeInternal(OpKernelContext* ctx) con return Status::OK(); } - HostApplyLayerNorm(GetDeviceProp(), Stream(ctx), Y_data, mean_data, inv_var_data, - X_data, n1, n2, epsilon_, scale_data, bias_data); + HostApplyLayerNorm( + GetDeviceProp(), Stream(ctx), Y_data, mean_data, inv_var_data, X_data, + onnxruntime::narrow(params.num_rows), onnxruntime::narrow(params.norm_size), epsilon_, + scale_data, bias_data, + onnxruntime::narrow(params.broadcast_param)); CUDA_RETURN_IF_ERROR(cudaGetLastError()); return Status::OK(); } diff --git a/onnxruntime/core/providers/cuda/nn/layer_norm_impl.cu b/onnxruntime/core/providers/cuda/nn/layer_norm_impl.cu index b9e8b45307079..90b542beaaf26 100644 --- a/onnxruntime/core/providers/cuda/nn/layer_norm_impl.cu +++ b/onnxruntime/core/providers/cuda/nn/layer_norm_impl.cu @@ -23,8 +23,8 @@ /* Modifications Copyright (c) Microsoft. */ #include "core/providers/cuda/cu_inc/common.cuh" - #include "layer_norm_impl.h" +#include "core/providers/cpu/nn/layer_norm_helper.h" namespace onnxruntime { namespace cuda { @@ -334,6 +334,7 @@ __global__ void cuApplyLayerNorm( const U epsilon, const V* __restrict__ gamma, const V* __restrict__ beta, + int broadcast_param, const T* __restrict__ skip, const T* __restrict__ bias, T* __restrict__ skip_input_bias_add_output) { @@ -353,6 +354,10 @@ __global__ void cuApplyLayerNorm( V* ovals = output_vals + offset; T* skip_input_bias_add_ovals = (skip_input_bias_add_output != nullptr) ? skip_input_bias_add_output + offset : nullptr; U c_inv_std_dev = rsqrt(sigma2 + epsilon); + + // Compute the offset of gamma and beta to support broadcasting. + int gamma_beta_offset = LAYER_NORM_SCALE_BIAS_OFFSET(broadcast_param, i1, n2); + const int numx = blockDim.x * blockDim.y; const int thrx = threadIdx.x + threadIdx.y * blockDim.x; for (int i = thrx; i < n2; i += numx) { @@ -366,8 +371,10 @@ __global__ void cuApplyLayerNorm( curr += static_cast(skip_vals[i]); } - U gamma_i = (gamma != nullptr) ? (U)gamma[i] : (U)1; - U beta_i = (beta != nullptr) ? (U)beta[i] : (U)0; + int index = gamma_beta_offset + i; + U gamma_i = (gamma != nullptr) ? (U)gamma[index] : (U)1; + U beta_i = (beta != nullptr) ? (U)beta[index] : (U)0; + if (simplified) { ovals[i] = static_cast(gamma_i * c_inv_std_dev * curr); } else { @@ -409,6 +416,7 @@ void HostApplyLayerNorm( double epsilon, const V* gamma, const V* beta, + int broadcast_param, const T* skip, const T* bias, T* skip_input_bias_add_output) { @@ -442,15 +450,16 @@ void HostApplyLayerNorm( input, n1, n2, U(epsilon), - gamma, beta, + gamma, beta, broadcast_param, skip, bias, skip_input_bias_add_output); } -#define LAYERNORM_LINEAR_IMPL(T, U, V, simplified) \ - template void HostApplyLayerNorm(const cudaDeviceProp& prop, cudaStream_t stream, V* output, \ - U* mean, U* inv_std_dev, const T* input, int n1, int n2, \ - double epsilon, const V* gamma, const V* beta, const T* skip, \ - const T* bias, T* skip_input_bias_add_output); +#define LAYERNORM_LINEAR_IMPL(T, U, V, simplified) \ + template void HostApplyLayerNorm(const cudaDeviceProp& prop, cudaStream_t stream, V* output, \ + U* mean, U* inv_std_dev, const T* input, int n1, int n2, \ + double epsilon, const V* gamma, const V* beta, \ + int broadcast_param, \ + const T* skip, const T* bias, T* skip_input_bias_add_output); LAYERNORM_LINEAR_IMPL(float, float, float, true) LAYERNORM_LINEAR_IMPL(half, float, half, true) diff --git a/onnxruntime/core/providers/cuda/nn/layer_norm_impl.h b/onnxruntime/core/providers/cuda/nn/layer_norm_impl.h index e3952eefae35d..4e74aa9ab67b4 100644 --- a/onnxruntime/core/providers/cuda/nn/layer_norm_impl.h +++ b/onnxruntime/core/providers/cuda/nn/layer_norm_impl.h @@ -41,6 +41,7 @@ void HostApplyLayerNorm( double epsilon, const V* gamma, const V* beta, + int broadcast_param = 0, // parameter for broadcasting gamma/beta. const T* skip = nullptr, const T* bias = nullptr, T* skip_input_bias_add_output = nullptr); diff --git a/onnxruntime/core/providers/cuda/nn/pool.cc b/onnxruntime/core/providers/cuda/nn/pool.cc index 4acdcfcf35491..25c8210d4aba3 100644 --- a/onnxruntime/core/providers/cuda/nn/pool.cc +++ b/onnxruntime/core/providers/cuda/nn/pool.cc @@ -287,13 +287,14 @@ Status Pool, Layout>::ComputeInternal(OpKernelContext* context) co } Tensor* I = context->Output(1, TensorShape(i_dims)); + constexpr bool pool_template_arg = Layout == LAYOUT_NHWC; if (nullptr != I || !this->pool_attrs_.default_dilations) { auto i_data = nullptr == I ? nullptr : I->MutableData(); - MaxPoolWithIndex(this->Stream(context), x_shape, TensorShape(y_dims), kernel_shape, - strides, pads, this->pool_attrs_.dilations, - this->pool_attrs_.storage_order, x_data, y_data, i_data); + MaxPoolWithIndex(this->Stream(context), x_shape, TensorShape(y_dims), kernel_shape, + strides, pads, this->pool_attrs_.dilations, + this->pool_attrs_.storage_order, x_data, y_data, i_data); } else { - ORT_RETURN_IF_ERROR((Pool, Layout == LAYOUT_NHWC>::ComputeInternal(context))); + ORT_RETURN_IF_ERROR((Pool, pool_template_arg>::ComputeInternal(context))); } return Status::OK(); } diff --git a/onnxruntime/core/providers/cuda/tensor/compress_impl.cu b/onnxruntime/core/providers/cuda/tensor/compress_impl.cu index b2c7b60866a77..0c04e027ca1b9 100644 --- a/onnxruntime/core/providers/cuda/tensor/compress_impl.cu +++ b/onnxruntime/core/providers/cuda/tensor/compress_impl.cu @@ -13,7 +13,6 @@ #include "core/providers/cuda/tensor/compress_impl.h" -#include #include namespace onnxruntime { @@ -23,7 +22,7 @@ namespace cuda { // in InclusiveSum(). By default, the accumulator type matches the input, but for int8_t // the sum overflows quickly, so we want the source type to match the output (int32_t). // see https://github.com/NVIDIA/cub/issues/384 -struct CastToInt32 : public thrust::unary_function { +struct CastToInt32 { __host__ __device__ int32_t operator()(int8_t v) const { return static_cast(v); } diff --git a/onnxruntime/core/providers/cuda/tensor/space_depth_ops.cc b/onnxruntime/core/providers/cuda/tensor/space_depth_ops.cc index aaaf3600b676e..cbfc62494fde4 100644 --- a/onnxruntime/core/providers/cuda/tensor/space_depth_ops.cc +++ b/onnxruntime/core/providers/cuda/tensor/space_depth_ops.cc @@ -171,13 +171,13 @@ Status SpaceToDepth::ComputeInternal(OpKernelContext* context) const { int64_t output_depth = -1; int64_t output_height = -1; int64_t output_width = -1; - + constexpr bool template_arg = Layout == LAYOUT_NHWC; ORT_RETURN_IF_ERROR( - InputValidationsAndOutputDimsCalc(input, - batch, - input_depth, input_height, input_width, - output_depth, output_height, output_width, - true)); + InputValidationsAndOutputDimsCalc(input, + batch, + input_depth, input_height, input_width, + output_depth, output_height, output_width, + true)); // We use the "actual" output shape to construct the output tensor Tensor& output = (Layout == LAYOUT_NCHW) @@ -223,13 +223,13 @@ Status DepthToSpace::ComputeInternal(OpKernelContext* context) const { int64_t output_depth = -1; int64_t output_height = -1; int64_t output_width = -1; - + constexpr bool template_arg = Layout == LAYOUT_NHWC; ORT_RETURN_IF_ERROR( - InputValidationsAndOutputDimsCalc(input, - batch, - input_depth, input_height, input_width, - output_depth, output_height, output_width, - false)); + InputValidationsAndOutputDimsCalc(input, + batch, + input_depth, input_height, input_width, + output_depth, output_height, output_width, + false)); // We use the "actual" output shape to construct the output tensor Tensor& output = (Layout == LAYOUT_NCHW) diff --git a/onnxruntime/core/providers/migraphx/gpu_data_transfer.cc b/onnxruntime/core/providers/migraphx/gpu_data_transfer.cc index 77c5e18a5878e..a643f6b208f94 100644 --- a/onnxruntime/core/providers/migraphx/gpu_data_transfer.cc +++ b/onnxruntime/core/providers/migraphx/gpu_data_transfer.cc @@ -67,6 +67,9 @@ common::Status GPUDataTransfer::CopyTensorAsync(const Tensor& src, Tensor& dst, } else if (src_device.Type() == OrtDevice::GPU) { // copying between GPU, this is non-blocking HIP_CALL_THROW(hipMemcpyAsync(dst_data, src_data, bytes, hipMemcpyDeviceToDevice, static_cast(stream.GetHandle()))); + } else { + // copy from other CPU memory to GPU, this is blocking + HIP_CALL_THROW(hipMemcpyWithStream(dst_data, src_data, bytes, hipMemcpyHostToDevice, static_cast(stream.GetHandle()))); } } else if (src_device.Type() == OrtDevice::GPU) { // If dest are not pinned, the memory copy will be performed synchronously. diff --git a/onnxruntime/core/providers/migraphx/migraphx_execution_provider.cc b/onnxruntime/core/providers/migraphx/migraphx_execution_provider.cc index 9017b36a0f087..95fbe7ab58ce2 100644 --- a/onnxruntime/core/providers/migraphx/migraphx_execution_provider.cc +++ b/onnxruntime/core/providers/migraphx/migraphx_execution_provider.cc @@ -821,6 +821,7 @@ GetUnsupportedNodeIndices(const GraphViewer& graph_viewer, "DequantizeLinear", "Div", "Dropout", + "Einsum", "Elu", "Equal", "Erf", diff --git a/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/impl/LRN_op_builder.cc b/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/impl/LRN_op_builder.cc index 91cad034d8854..fd1720d69eebd 100644 --- a/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/impl/LRN_op_builder.cc +++ b/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/impl/LRN_op_builder.cc @@ -1,7 +1,7 @@ // Copyright (c) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. -#include +#include "core/graph/onnx_protobuf.h" #include "core/common/logging/logging.h" #include "core/common/safeint.h" diff --git a/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/impl/batchnorm_op_builder.cc b/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/impl/batchnorm_op_builder.cc index 75a66d3a14643..5874eb1e7dc3b 100644 --- a/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/impl/batchnorm_op_builder.cc +++ b/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/impl/batchnorm_op_builder.cc @@ -1,7 +1,7 @@ // Copyright (c) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. -#include +#include "core/graph/onnx_protobuf.h" #include "core/common/logging/logging.h" #include "core/common/safeint.h" diff --git a/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/impl/binary_op_builder.cc b/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/impl/binary_op_builder.cc index 5599fbdc69bdd..91d1a38e71e6f 100644 --- a/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/impl/binary_op_builder.cc +++ b/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/impl/binary_op_builder.cc @@ -1,7 +1,7 @@ // Copyright (c) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. -#include +#include "core/graph/onnx_protobuf.h" #include "core/common/logging/logging.h" #include "core/common/safeint.h" diff --git a/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/impl/cast_op_builder.cc b/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/impl/cast_op_builder.cc index 9059de817e210..03329b9159c06 100644 --- a/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/impl/cast_op_builder.cc +++ b/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/impl/cast_op_builder.cc @@ -1,7 +1,7 @@ // Copyright (c) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. -#include +#include "core/graph/onnx_protobuf.h" #include "core/common/logging/logging.h" #include "core/common/safeint.h" diff --git a/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/impl/clip_op_builder.cc b/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/impl/clip_op_builder.cc index 9821d9267c71f..becd677e32ac1 100644 --- a/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/impl/clip_op_builder.cc +++ b/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/impl/clip_op_builder.cc @@ -1,7 +1,7 @@ // Copyright (c) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. -#include +#include "core/graph/onnx_protobuf.h" #include "core/common/logging/logging.h" #include "core/common/safeint.h" diff --git a/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/impl/concat_op_builder.cc b/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/impl/concat_op_builder.cc index a8394faec51be..fa5e292be0ecd 100644 --- a/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/impl/concat_op_builder.cc +++ b/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/impl/concat_op_builder.cc @@ -1,7 +1,7 @@ // Copyright (c) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. -#include +#include "core/graph/onnx_protobuf.h" #include "core/common/logging/logging.h" #include "core/common/safeint.h" diff --git a/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/impl/conv_op_builder.cc b/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/impl/conv_op_builder.cc index 5477cd16f9c01..a7a837ae210b4 100644 --- a/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/impl/conv_op_builder.cc +++ b/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/impl/conv_op_builder.cc @@ -1,7 +1,7 @@ // Copyright (c) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. -#include +#include "core/graph/onnx_protobuf.h" #include "core/common/logging/logging.h" #include "core/common/safeint.h" diff --git a/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/impl/depthtospace_op_builder.cc b/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/impl/depthtospace_op_builder.cc index ef8709641e2d0..039d8510bb8d2 100644 --- a/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/impl/depthtospace_op_builder.cc +++ b/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/impl/depthtospace_op_builder.cc @@ -1,7 +1,7 @@ // Copyright (c) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. -#include +#include "core/graph/onnx_protobuf.h" #include "core/common/logging/logging.h" #include "core/common/safeint.h" diff --git a/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/impl/dequantizelinear_op_builder.cc b/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/impl/dequantizelinear_op_builder.cc index 7d0e04fbd7b0e..ed9062f894660 100644 --- a/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/impl/dequantizelinear_op_builder.cc +++ b/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/impl/dequantizelinear_op_builder.cc @@ -1,7 +1,7 @@ // Copyright (c) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. -#include +#include "core/graph/onnx_protobuf.h" #include "core/common/logging/logging.h" #include "core/common/safeint.h" diff --git a/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/impl/elu_op_builder.cc b/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/impl/elu_op_builder.cc index 218c41d6f07c0..fc2348951edb7 100644 --- a/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/impl/elu_op_builder.cc +++ b/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/impl/elu_op_builder.cc @@ -1,7 +1,7 @@ // Copyright (c) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. -#include +#include "core/graph/onnx_protobuf.h" #include "core/common/logging/logging.h" #include "core/common/safeint.h" diff --git a/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/impl/flatten_op_builder.cc b/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/impl/flatten_op_builder.cc index b5e9c011990ce..986ce78fb1acc 100644 --- a/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/impl/flatten_op_builder.cc +++ b/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/impl/flatten_op_builder.cc @@ -1,7 +1,7 @@ // Copyright (c) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. -#include +#include "core/graph/onnx_protobuf.h" #include "core/common/logging/logging.h" #include "core/common/safeint.h" diff --git a/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/impl/gather_op_builder.cc b/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/impl/gather_op_builder.cc index d6da9181b5a3d..ccd3f8b571fcb 100644 --- a/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/impl/gather_op_builder.cc +++ b/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/impl/gather_op_builder.cc @@ -1,7 +1,7 @@ // Copyright (c) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. -#include +#include "core/graph/onnx_protobuf.h" #include "core/common/logging/logging.h" #include "core/common/safeint.h" diff --git a/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/impl/gemm_op_builder.cc b/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/impl/gemm_op_builder.cc index 66eefcd6e4840..cff96c2f1ff99 100644 --- a/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/impl/gemm_op_builder.cc +++ b/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/impl/gemm_op_builder.cc @@ -1,7 +1,7 @@ // Copyright (c) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. -#include +#include "core/graph/onnx_protobuf.h" #include "core/common/logging/logging.h" #include "core/common/safeint.h" diff --git a/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/impl/identity_op_builder.cc b/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/impl/identity_op_builder.cc index d7b35572e6cd1..250b190091a52 100644 --- a/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/impl/identity_op_builder.cc +++ b/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/impl/identity_op_builder.cc @@ -1,7 +1,7 @@ // Copyright (c) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. -#include +#include "core/graph/onnx_protobuf.h" #include "core/common/logging/logging.h" #include "core/common/safeint.h" diff --git a/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/impl/leakyrelu_op_builder.cc b/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/impl/leakyrelu_op_builder.cc index 6a633c443c9e5..e3dcee1e3d597 100644 --- a/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/impl/leakyrelu_op_builder.cc +++ b/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/impl/leakyrelu_op_builder.cc @@ -1,7 +1,7 @@ // Copyright (c) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. -#include +#include "core/graph/onnx_protobuf.h" #include "core/common/logging/logging.h" #include "core/framework/tensorprotoutils.h" diff --git a/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/impl/minmax_op_builder.cc b/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/impl/minmax_op_builder.cc index aeadbd17053cf..a80742aef9cb6 100644 --- a/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/impl/minmax_op_builder.cc +++ b/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/impl/minmax_op_builder.cc @@ -1,7 +1,7 @@ // Copyright (c) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. -#include +#include "core/graph/onnx_protobuf.h" #include "core/common/logging/logging.h" #include "core/common/safeint.h" diff --git a/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/impl/pad_op_builder.cc b/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/impl/pad_op_builder.cc index b0404ebec0583..8127de0a0f05f 100644 --- a/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/impl/pad_op_builder.cc +++ b/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/impl/pad_op_builder.cc @@ -1,7 +1,7 @@ // Copyright (c) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. -#include +#include "core/graph/onnx_protobuf.h" #include "core/common/logging/logging.h" #include "core/common/safeint.h" diff --git a/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/impl/pool_op_builder.cc b/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/impl/pool_op_builder.cc index a2a4786b72ec7..10c5efb84ed8f 100644 --- a/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/impl/pool_op_builder.cc +++ b/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/impl/pool_op_builder.cc @@ -1,7 +1,7 @@ // Copyright (c) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. -#include +#include "core/graph/onnx_protobuf.h" #include "core/common/logging/logging.h" #include "core/common/safeint.h" diff --git a/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/impl/quantizelinear_op_builder.cc b/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/impl/quantizelinear_op_builder.cc index d13b81c2a14b8..eb81f5e3f59ee 100644 --- a/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/impl/quantizelinear_op_builder.cc +++ b/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/impl/quantizelinear_op_builder.cc @@ -1,7 +1,7 @@ // Copyright (c) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. -#include +#include "core/graph/onnx_protobuf.h" #include "core/common/logging/logging.h" #include "core/common/safeint.h" diff --git a/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/impl/reduction_op_builder.cc b/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/impl/reduction_op_builder.cc index a6da290753b74..fbb353f949f48 100644 --- a/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/impl/reduction_op_builder.cc +++ b/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/impl/reduction_op_builder.cc @@ -1,7 +1,7 @@ // Copyright (c) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. -#include +#include "core/graph/onnx_protobuf.h" #include "core/common/logging/logging.h" #include "core/common/safeint.h" diff --git a/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/impl/relu_op_builder.cc b/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/impl/relu_op_builder.cc index c8641093ee7eb..d65c069851c1f 100644 --- a/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/impl/relu_op_builder.cc +++ b/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/impl/relu_op_builder.cc @@ -1,7 +1,7 @@ // Copyright (c) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. -#include +#include "core/graph/onnx_protobuf.h" #include "core/common/logging/logging.h" #include "core/common/safeint.h" diff --git a/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/impl/reshape_op_builder.cc b/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/impl/reshape_op_builder.cc index f2f9165d2f3cc..fad5d8289c6b0 100644 --- a/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/impl/reshape_op_builder.cc +++ b/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/impl/reshape_op_builder.cc @@ -1,7 +1,7 @@ // Copyright (c) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. -#include +#include "core/graph/onnx_protobuf.h" #include "core/common/logging/logging.h" #include "core/common/safeint.h" diff --git a/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/impl/resize_op_builder.cc b/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/impl/resize_op_builder.cc index 44403010c936c..af5aeba6c8236 100644 --- a/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/impl/resize_op_builder.cc +++ b/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/impl/resize_op_builder.cc @@ -1,7 +1,7 @@ // Copyright (c) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. -#include +#include "core/graph/onnx_protobuf.h" #include "core/common/logging/logging.h" #include "core/common/safeint.h" diff --git a/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/impl/slice_op_builder.cc b/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/impl/slice_op_builder.cc index facdc7132dc00..52b075b0271ef 100644 --- a/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/impl/slice_op_builder.cc +++ b/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/impl/slice_op_builder.cc @@ -1,7 +1,7 @@ // Copyright (c) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. -#include +#include "core/graph/onnx_protobuf.h" #include "core/common/logging/logging.h" #include "core/common/safeint.h" diff --git a/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/impl/softmax_op_builder.cc b/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/impl/softmax_op_builder.cc index a2a8b4512b028..8fa915de95a72 100644 --- a/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/impl/softmax_op_builder.cc +++ b/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/impl/softmax_op_builder.cc @@ -1,7 +1,7 @@ // Copyright (c) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. -#include +#include "core/graph/onnx_protobuf.h" #include "core/common/logging/logging.h" #include "core/common/safeint.h" diff --git a/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/impl/split_op_builder.cc b/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/impl/split_op_builder.cc index edee298ad1ccf..7509fd15f1c5e 100644 --- a/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/impl/split_op_builder.cc +++ b/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/impl/split_op_builder.cc @@ -1,7 +1,7 @@ // Copyright (c) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. -#include +#include "core/graph/onnx_protobuf.h" #include #include "core/common/logging/logging.h" diff --git a/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/impl/squeeze_op_builder.cc b/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/impl/squeeze_op_builder.cc index fb3ca5e6175fa..44510c33c004c 100644 --- a/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/impl/squeeze_op_builder.cc +++ b/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/impl/squeeze_op_builder.cc @@ -1,7 +1,7 @@ // Copyright (c) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. -#include +#include "core/graph/onnx_protobuf.h" #include "core/common/logging/logging.h" #include "core/common/safeint.h" diff --git a/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/impl/transpose_op_builder.cc b/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/impl/transpose_op_builder.cc index 6fe5ca32fe044..4a9e3eb00a787 100644 --- a/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/impl/transpose_op_builder.cc +++ b/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/impl/transpose_op_builder.cc @@ -1,7 +1,7 @@ // Copyright (c) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. -#include +#include "core/graph/onnx_protobuf.h" #include "core/common/logging/logging.h" #include "core/common/safeint.h" diff --git a/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/impl/unary_op_builder.cc b/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/impl/unary_op_builder.cc index dbd960ee5536c..77df9d2fd771c 100644 --- a/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/impl/unary_op_builder.cc +++ b/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/impl/unary_op_builder.cc @@ -1,7 +1,7 @@ // Copyright (c) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. -#include +#include "core/graph/onnx_protobuf.h" #include "core/common/logging/logging.h" #include "core/common/safeint.h" diff --git a/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/impl/unsqueeze_op_builder.cc b/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/impl/unsqueeze_op_builder.cc index 95cd813800c9a..b9ebbace8d391 100644 --- a/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/impl/unsqueeze_op_builder.cc +++ b/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/impl/unsqueeze_op_builder.cc @@ -1,7 +1,7 @@ // Copyright (c) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. -#include +#include "core/graph/onnx_protobuf.h" #include "core/common/logging/logging.h" #include "core/common/safeint.h" diff --git a/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/model_builder.h b/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/model_builder.h index 4db335afa98b0..3cbf7d1ee40e8 100644 --- a/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/model_builder.h +++ b/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/model_builder.h @@ -2,7 +2,7 @@ // Licensed under the MIT License. #pragma once -#include +#include "core/graph/onnx_protobuf.h" #include #include "core/common/inlined_containers_fwd.h" diff --git a/onnxruntime/core/providers/qnn/builder/onnx_ctx_model_helper.cc b/onnxruntime/core/providers/qnn/builder/onnx_ctx_model_helper.cc index 79674fd706151..3df231e53e7c0 100644 --- a/onnxruntime/core/providers/qnn/builder/onnx_ctx_model_helper.cc +++ b/onnxruntime/core/providers/qnn/builder/onnx_ctx_model_helper.cc @@ -2,13 +2,15 @@ // Licensed under the MIT License. #include "core/providers/qnn/builder/onnx_ctx_model_helper.h" -#include "core/graph/constants.h" -#include "core/providers/qnn/builder/qnn_model.h" #include #include #include +#include "core/providers/qnn/ort_api.h" +#include "core/providers/qnn/builder/qnn_utils.h" +#include "core/providers/qnn/builder/qnn_model.h" + namespace onnxruntime { namespace qnn { @@ -51,9 +53,9 @@ Status GetMainContextNode(const std::vectorOpType(), "Should only filter in the EPContext node."); - NodeAttrHelper node_helper(*ep_context_node); + const Node& ep_context_node = *graph_viewer.Nodes().begin(); + ORT_RETURN_IF_NOT(EPCONTEXT_OP == ep_context_node.OpType(), "Should only filter in the EPContext node."); + NodeAttrHelper node_helper(ep_context_node); int64_t is_main_context = node_helper.Get(MAIN_CONTEXT, static_cast(0)); if (1 == is_main_context) { main_context_pos.push_back(static_cast(i)); @@ -68,17 +70,16 @@ Status CreateNodeArgs(const std::vector& names, const std::unordered_map& tensor_info_table, std::vector& node_args, onnxruntime::Graph& graph) { - using namespace ONNX_NAMESPACE; for (size_t i = 0; i < names.size(); ++i) { std::string name = names[i]; ORT_RETURN_IF(tensor_info_table.find(name) == tensor_info_table.end(), "Tensor name: ", name, " not found in tensor_info_table"); const OnnxTensorInfo& tensor_info = tensor_info_table.at(name); - TypeProto tensor_type; - tensor_type.mutable_tensor_type()->set_elem_type(tensor_info.data_type_); + std::unique_ptr tensor_type = Factory::Create(); + tensor_type->mutable_tensor_type()->set_elem_type(tensor_info.data_type_); for (size_t j = 0; j < tensor_info.shape_.size(); ++j) { - tensor_type.mutable_tensor_type()->mutable_shape()->add_dim()->set_dim_value(tensor_info.shape_[j]); + tensor_type->mutable_tensor_type()->mutable_shape()->add_dim()->set_dim_value(tensor_info.shape_[j]); } - auto& input_arg = graph.GetOrCreateNodeArg(name, &tensor_type); + auto& input_arg = graph.GetOrCreateNodeArg(name, tensor_type.get()); node_args.push_back(&input_arg); } return Status::OK(); @@ -161,8 +162,8 @@ Status TryGetMaxSpillFillSize(const std::vector(0)); if (max_size > max_spill_fill_size) { max_spill_fill_size = max_size; diff --git a/onnxruntime/core/providers/qnn/builder/onnx_ctx_model_helper.h b/onnxruntime/core/providers/qnn/builder/onnx_ctx_model_helper.h index 92c5391b40f09..3dfa0ae21001b 100644 --- a/onnxruntime/core/providers/qnn/builder/onnx_ctx_model_helper.h +++ b/onnxruntime/core/providers/qnn/builder/onnx_ctx_model_helper.h @@ -6,12 +6,8 @@ #include #include -#include "qnn_def.h" -#include "core/common/logging/logging.h" -#include "core/graph/graph_viewer.h" -#include "core/providers/shared/utils/utils.h" -#include "core/graph/model.h" -#include "core/framework/execution_provider.h" +#include "core/providers/qnn/builder/qnn_def.h" +#include "core/providers/qnn/ort_api.h" namespace onnxruntime { diff --git a/onnxruntime/core/providers/qnn/builder/op_builder.h b/onnxruntime/core/providers/qnn/builder/op_builder.h index 05398c3f22ea2..0846275496ebf 100644 --- a/onnxruntime/core/providers/qnn/builder/op_builder.h +++ b/onnxruntime/core/providers/qnn/builder/op_builder.h @@ -3,9 +3,7 @@ #pragma once -#include "core/graph/graph_viewer.h" -#include "core/framework/node_unit.h" -#include "core/providers/shared/utils/utils.h" +#include "core/providers/qnn/ort_api.h" namespace onnxruntime { namespace qnn { diff --git a/onnxruntime/core/providers/qnn/builder/op_builder_factory.cc b/onnxruntime/core/providers/qnn/builder/op_builder_factory.cc index e411c2a6bf536..3d66003fb2bca 100644 --- a/onnxruntime/core/providers/qnn/builder/op_builder_factory.cc +++ b/onnxruntime/core/providers/qnn/builder/op_builder_factory.cc @@ -5,8 +5,6 @@ #include #include -#include - #include "op_builder_factory.h" namespace onnxruntime { diff --git a/onnxruntime/core/providers/qnn/builder/opbuilder/argmax_min_op_builder.cc b/onnxruntime/core/providers/qnn/builder/opbuilder/argmax_min_op_builder.cc index c685fa065e2ba..e3a6141c292dd 100644 --- a/onnxruntime/core/providers/qnn/builder/opbuilder/argmax_min_op_builder.cc +++ b/onnxruntime/core/providers/qnn/builder/opbuilder/argmax_min_op_builder.cc @@ -1,14 +1,11 @@ // Copyright (c) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. -#include "core/providers/common.h" -#include "core/providers/shared/utils/utils.h" -#include "core/framework/tensorprotoutils.h" +#include "core/providers/qnn/builder/opbuilder/base_op_builder.h" +#include "core/providers/qnn/builder/qnn_utils.h" #include "core/providers/qnn/builder/qnn_model_wrapper.h" #include "core/providers/qnn/builder/op_builder_factory.h" -#include "base_op_builder.h" - namespace onnxruntime { namespace qnn { diff --git a/onnxruntime/core/providers/qnn/builder/opbuilder/base_op_builder.cc b/onnxruntime/core/providers/qnn/builder/opbuilder/base_op_builder.cc index ed70111087e19..cd1ee72e00d4f 100644 --- a/onnxruntime/core/providers/qnn/builder/opbuilder/base_op_builder.cc +++ b/onnxruntime/core/providers/qnn/builder/opbuilder/base_op_builder.cc @@ -2,15 +2,9 @@ // Licensed under the MIT License. #include "core/providers/qnn/builder/opbuilder/base_op_builder.h" +#include #include "core/providers/qnn/builder/qnn_utils.h" -#include - -#include "core/providers/shared/utils/utils.h" -#include "core/framework/tensorprotoutils.h" -#include "core/providers/cpu/tensor/transpose.h" -#include "core/common/safeint.h" - namespace onnxruntime { namespace qnn { @@ -271,37 +265,189 @@ Status BaseOpBuilder::SetOutputQParamEqualToInputIfNearlyEqual(QnnModelWrapper& return Status::OK(); } -Status BaseOpBuilder::TransposeInitializer(const QnnModelWrapper& qnn_model_wrapper, - const onnx::TensorProto& initializer, - const std::vector& perm, - std::vector& transposed_data) const { - const DataTypeImpl* tensor_dtype = DataTypeImpl::TensorTypeFromONNXEnum(initializer.data_type())->GetElementType(); - const auto tensor_shape_dims = onnxruntime::utils::GetTensorShapeFromTensorProto(initializer); - TensorShape tensor_shape{tensor_shape_dims}; - AllocatorPtr cpu_allocator = std::make_shared(); - Tensor in_tensor = Tensor(tensor_dtype, tensor_shape, cpu_allocator); - - auto rank = perm.size(); - std::vector new_tensor_shape_dims; - std::vector permutations; - new_tensor_shape_dims.reserve(rank); - permutations.reserve(rank); - for (int64_t p : perm) { - permutations.push_back(p); - new_tensor_shape_dims.push_back(tensor_shape_dims[p]); +static Status GetTransposeStrides(const TensorShape& input_shape, + gsl::span perm, + gsl::span input_strides, + gsl::span output_strides) { + const size_t rank = input_shape.NumDimensions(); + ORT_RETURN_IF_NOT(perm.size() == rank, "Expected perm size of ", rank); + ORT_RETURN_IF_NOT(input_strides.size() == rank, "Expected input_strides size of ", rank); + ORT_RETURN_IF_NOT(output_strides.size() == rank, "Expected output_strides size of ", rank); + std::vector output_shape_dims(rank); + ORT_RETURN_IF_ERROR((qnn::utils::PermuteShape(input_shape.GetDims(), perm, output_shape_dims))); + const TensorShape output_shape = TensorShape::FromExistingBuffer(output_shape_dims); + + for (size_t i = 0; i < rank; ++i) { + int64_t stride = (i < rank - 1) ? input_shape.SizeFromDimension(i + 1) : 1; + ORT_RETURN_IF_NOT(stride > 0, "Expected positive shape dims when computing strides."); + input_strides[i] = static_cast(stride); + } + + for (size_t i = 0; i < rank; ++i) { + int64_t stride = (i < rank - 1) ? output_shape.SizeFromDimension(i + 1) : 1; + ORT_RETURN_IF_NOT(stride > 0, "Expected positive shape dims when computing strides."); + output_strides[i] = static_cast(stride); } - TensorShape new_tensor_shape(new_tensor_shape_dims); - Tensor out_tensor = Tensor(tensor_dtype, new_tensor_shape, cpu_allocator); - ORT_RETURN_IF_ERROR(onnxruntime::utils::TensorProtoToTensor( - Env::Default(), qnn_model_wrapper.GetGraphViewer().ModelPath(), initializer, in_tensor)); - ORT_RETURN_IF_ERROR(Transpose::DoTranspose(permutations, in_tensor, out_tensor)); - onnx::TensorProto new_tensor_proto = onnxruntime::utils::TensorToTensorProto(out_tensor, "test"); - ORT_RETURN_IF_ERROR(qnn_model_wrapper.UnpackInitializerData(new_tensor_proto, transposed_data)); + return Status::OK(); +} + +// Internal function to transpose data of rank 5 with the given permutation. +// Example: transpose input from either (N,C,H,W,D) or (C,N,H,W,D) to (H,W,D,C,N). +static Status TransposeDataRank5(const TensorShape& input_shape, + gsl::span perm, + size_t elem_byte_size, + gsl::span input_buffer, + gsl::span output_buffer) { + std::array input_strides = {}; + std::array output_strides = {}; + ORT_RETURN_IF_ERROR(GetTransposeStrides(input_shape, perm, input_strides, output_strides)); + + std::vector perm_inverse(perm.size()); + ORT_RETURN_IF_ERROR(qnn::utils::InvertPerm(perm, perm_inverse)); + + for (int64_t d0 = 0; d0 < input_shape[0]; ++d0) { + for (int64_t d1 = 0; d1 < input_shape[1]; ++d1) { + for (int64_t d2 = 0; d2 < input_shape[2]; ++d2) { + for (int64_t d3 = 0; d3 < input_shape[3]; ++d3) { + for (int64_t d4 = 0; d4 < input_shape[4]; ++d4) { + const size_t src_elem_index = ((d0 * input_strides[0]) + + (d1 * input_strides[1]) + + (d2 * input_strides[2]) + + (d3 * input_strides[3]) + + (d4 * input_strides[4])); + const size_t dst_elem_index = ((d0 * output_strides[perm_inverse[0]]) + + (d1 * output_strides[perm_inverse[1]]) + + (d2 * output_strides[perm_inverse[2]]) + + (d3 * output_strides[perm_inverse[3]]) + + (d4 * output_strides[perm_inverse[4]])); + + const size_t src_byte_index = src_elem_index * elem_byte_size; + const size_t dst_byte_index = dst_elem_index * elem_byte_size; + assert(src_byte_index < input_buffer.size()); + assert(dst_byte_index < output_buffer.size()); + + std::memcpy(&output_buffer[dst_byte_index], &input_buffer[src_byte_index], elem_byte_size); + } + } + } + } + } return Status::OK(); } +Status BaseOpBuilder::TwoDimensionTranspose(const QnnModelWrapper& qnn_model_wrapper, + std::vector& data_shape, + const onnx::TensorProto& initializer, + std::vector& transposed_data) const { + ORT_RETURN_IF_NOT(data_shape.size() == 2, "Expected shape of rank 2"); + + std::array perm = {1, 0}; + std::vector output_shape(data_shape.size()); + ORT_RETURN_IF_ERROR((qnn::utils::PermuteShape(data_shape, perm, output_shape))); + + auto onnx_type = static_cast(initializer.data_type()); + const size_t elem_byte_size = qnn::utils::GetElementSizeByType(onnx_type); + ORT_RETURN_IF_NOT(elem_byte_size != 0, "Can't get element byte size from given ONNX type"); + + std::vector input_buffer; + ORT_RETURN_IF_ERROR(qnn_model_wrapper.UnpackInitializerData(initializer, input_buffer)); + transposed_data.resize(input_buffer.size()); + + for (size_t row = 0; row < data_shape[0]; row++) { + for (size_t col = 0; col < data_shape[1]; col++) { + const size_t src_elem_index = (row * data_shape[1] + col); + const size_t dst_elem_index = (col * output_shape[1] + row); + const size_t src_byte_index = src_elem_index * elem_byte_size; + const size_t dst_byte_index = dst_elem_index * elem_byte_size; + assert(src_byte_index < input_buffer.size()); + assert(dst_byte_index < transposed_data.size()); + + std::memcpy(&transposed_data[dst_byte_index], &input_buffer[src_byte_index], elem_byte_size); + } + } + + data_shape = std::move(output_shape); // Update parameter with final transposed shape + return Status::OK(); +} + +Status BaseOpBuilder::TransposeFromNchwToHwcn(const QnnModelWrapper& qnn_model_wrapper, + const onnx::TensorProto& initializer, + std::vector& transposed_data, + bool is_3d) const { + auto onnx_type = static_cast(initializer.data_type()); + const size_t elem_byte_size = qnn::utils::GetElementSizeByType(onnx_type); + std::vector input_shape = qnn::utils::GetInitializerShape(initializer); + std::vector input_buffer; + ORT_RETURN_IF_ERROR(qnn_model_wrapper.UnpackInitializerData(initializer, input_buffer)); + transposed_data.resize(input_buffer.size()); + return TransposeFromNchwToHwcn(std::move(input_shape), elem_byte_size, input_buffer, transposed_data, is_3d); +} + +Status BaseOpBuilder::TransposeFromNchwToHwcn(std::vector&& original_input_shape_dims, + size_t elem_byte_size, + gsl::span input_buffer, + gsl::span output_buffer, + bool is_3d) const { + std::vector input_shape_dims = std::move(original_input_shape_dims); + const size_t rank = input_shape_dims.size(); + ORT_RETURN_IF_NOT((is_3d && rank == 5) || (!is_3d && rank == 4), "Only support input of rank 4 or 5 but got rank ", + rank); + ORT_RETURN_IF_NOT(output_buffer.size() == input_buffer.size(), + "Expected output buffer's size to equal the input buffer's size: ", + output_buffer.size(), " != ", input_buffer.size()); + ORT_RETURN_IF_NOT(elem_byte_size != 0, "Invalid element byte size due to potentially unsupported type"); + + if (!is_3d) { + input_shape_dims.push_back(1); // Make it 3D by making shape (N,C,H,W,1) + } + + return TransposeDataRank5(TensorShape::FromExistingBuffer(input_shape_dims), + nchw2hwcn_perm_3d, + elem_byte_size, + input_buffer, + output_buffer); +} + +Status BaseOpBuilder::TransposeFromCnhwToHwcn(const QnnModelWrapper& qnn_model_wrapper, + const onnx::TensorProto& initializer, + std::vector& transposed_data, + bool is_3d) const { + auto onnx_type = static_cast(initializer.data_type()); + const size_t elem_byte_size = qnn::utils::GetElementSizeByType(onnx_type); + std::vector input_shape = qnn::utils::GetInitializerShape(initializer); + std::vector input_buffer; + ORT_RETURN_IF_ERROR(qnn_model_wrapper.UnpackInitializerData(initializer, input_buffer)); + transposed_data.resize(input_buffer.size()); + return TransposeFromCnhwToHwcn(std::move(input_shape), elem_byte_size, input_buffer, transposed_data, is_3d); +} + +Status BaseOpBuilder::TransposeFromCnhwToHwcn(std::vector&& original_input_shape_dims, + size_t elem_byte_size, + gsl::span input_buffer, + gsl::span output_buffer, + bool is_3d) const { + std::vector input_shape_dims = std::move(original_input_shape_dims); + const size_t rank = input_shape_dims.size(); + ORT_RETURN_IF_NOT((is_3d && rank == 5) || (!is_3d && rank == 4), "Only support input of rank 4 or 5 but got rank ", + rank); + ORT_RETURN_IF_NOT(output_buffer.size() == input_buffer.size(), + "Expected output buffer's size to equal the input buffer's size: ", + output_buffer.size(), " != ", input_buffer.size()); + ORT_RETURN_IF_NOT(elem_byte_size != 0, "Invalid element byte size due to potentially unsupported type"); + + if (!is_3d) { + input_shape_dims.push_back(1); // Make it 3D by making shape (C,N,H,W,1) + } + + return TransposeDataRank5(TensorShape::FromExistingBuffer(input_shape_dims), + cnhw2hwcn_perm_3d, + elem_byte_size, + input_buffer, + output_buffer); +} + Status BaseOpBuilder::ProcessAxisAttribute(const QnnModelWrapper& qnn_model_wrapper, const NodeUnit& node_unit, Qnn_Scalar_t& axis_qnn_scalar, diff --git a/onnxruntime/core/providers/qnn/builder/opbuilder/base_op_builder.h b/onnxruntime/core/providers/qnn/builder/opbuilder/base_op_builder.h index 055c0f6ccf2fa..8e34b5d87cc68 100644 --- a/onnxruntime/core/providers/qnn/builder/opbuilder/base_op_builder.h +++ b/onnxruntime/core/providers/qnn/builder/opbuilder/base_op_builder.h @@ -3,11 +3,11 @@ #pragma once -#include "core/providers/shared/utils/utils.h" +#include "core/providers/qnn/ort_api.h" +#include "core/providers/qnn/builder/qnn_utils.h" #include "core/providers/qnn/builder/qnn_model_wrapper.h" #include "core/providers/qnn/builder/op_builder.h" #include "core/providers/qnn/builder/qnn_quant_params_wrapper.h" -#include "core/framework/allocator.h" #include "QnnOpDef.h" @@ -215,7 +215,8 @@ class BaseOpBuilder : public IOpBuilder { } // NCHW shape to channel last - Status NchwShapeToNhwc(const std::vector& nchw_shape, std::vector& nhwc_shape) const { + template + Status NchwShapeToNhwc(gsl::span nchw_shape, gsl::span nhwc_shape) const { ORT_RETURN_IF_NOT(nchw_shape.size() == 4, "shape should have 4 dimension NCHW."); nhwc_shape[0] = nchw_shape[0]; nhwc_shape[1] = nchw_shape[2]; @@ -226,7 +227,8 @@ class BaseOpBuilder : public IOpBuilder { } // NCHW shape to HWCN shape, required for Conv weight - Status NchwShapeToHwcn(const std::vector& nchw_shape, std::vector& hwcn_shape) const { + template + Status NchwShapeToHwcn(gsl::span nchw_shape, gsl::span hwcn_shape) const { if (nchw_shape.size() == 4) { hwcn_shape[0] = nchw_shape[2]; hwcn_shape[1] = nchw_shape[3]; @@ -246,7 +248,8 @@ class BaseOpBuilder : public IOpBuilder { } // CNHW shape to HWCN shape, required for Conv weight - Status CnhwShapeToHwcn(const std::vector& cnhw_shape, std::vector& hwcn_shape) const { + template + Status CnhwShapeToHwcn(gsl::span cnhw_shape, gsl::span hwcn_shape) const { if (cnhw_shape.size() == 4) { hwcn_shape[0] = cnhw_shape[2]; hwcn_shape[1] = cnhw_shape[3]; @@ -264,37 +267,31 @@ class BaseOpBuilder : public IOpBuilder { return Status::OK(); } - Status TransposeInitializer(const QnnModelWrapper& qnn_model_wrapper, - const onnx::TensorProto& initializer, - const std::vector& perm, - std::vector& transposed_data) const; Status TransposeFromNchwToHwcn(const QnnModelWrapper& qnn_model_wrapper, const onnx::TensorProto& initializer, std::vector& transposed_data, - bool is_3d = false) const { - auto& perm = is_3d ? nchw2hwcn_perm_3d : nchw2hwcn_perm; - return TransposeInitializer(qnn_model_wrapper, initializer, perm, transposed_data); - } + bool is_3d = false) const; + Status TransposeFromNchwToHwcn(std::vector&& input_shape_dims, + size_t elem_byte_size, + gsl::span input_buffer, + gsl::span output_buffer, + bool is_3d = false) const; Status TransposeFromCnhwToHwcn(const QnnModelWrapper& qnn_model_wrapper, const onnx::TensorProto& initializer, std::vector& transposed_data, - bool is_3d = false) const { - auto& perm = is_3d ? cnhw2hwcn_perm_3d : cnhw2hwcn_perm; - return TransposeInitializer(qnn_model_wrapper, initializer, perm, transposed_data); - } + bool is_3d = false) const; + Status TransposeFromCnhwToHwcn(std::vector&& input_shape_dims, + size_t elem_byte_size, + gsl::span input_buffer, + gsl::span output_buffer, + bool is_3d = false) const; Status TwoDimensionTranspose(const QnnModelWrapper& qnn_model_wrapper, std::vector& data_shape, const onnx::TensorProto& initializer, - std::vector& transposed_data) const { - auto tmp = data_shape[0]; - data_shape[0] = data_shape[1]; - data_shape[1] = tmp; - std::vector two_dim_trans_perm{1, 0}; - return TransposeInitializer(qnn_model_wrapper, initializer, two_dim_trans_perm, transposed_data); - } + std::vector& transposed_data) const; // Onnx Pads is [x1_begin, x2_begin, x1_end, x2_end], QNN requires [x1_begin, x1_end, x2_begin, x2_end] void ReArranagePads(std::vector& pads) const { diff --git a/onnxruntime/core/providers/qnn/builder/opbuilder/batch_norm_op_builder.cc b/onnxruntime/core/providers/qnn/builder/opbuilder/batch_norm_op_builder.cc index 07abcf1c7bf84..14f50fa78c1a9 100644 --- a/onnxruntime/core/providers/qnn/builder/opbuilder/batch_norm_op_builder.cc +++ b/onnxruntime/core/providers/qnn/builder/opbuilder/batch_norm_op_builder.cc @@ -5,16 +5,11 @@ #include #include -#include "core/providers/common.h" -#include "core/providers/shared/utils/utils.h" -#include "core/framework/float16.h" -#include "core/framework/tensorprotoutils.h" +#include "core/providers/qnn/builder/opbuilder/base_op_builder.h" #include "core/providers/qnn/builder/qnn_model_wrapper.h" #include "core/providers/qnn/builder/qnn_utils.h" #include "core/providers/qnn/builder/op_builder_factory.h" -#include "base_op_builder.h" - namespace onnxruntime { namespace qnn { class BatchNormOpBuilder : public BaseOpBuilder { diff --git a/onnxruntime/core/providers/qnn/builder/opbuilder/cast_op_builder.cc b/onnxruntime/core/providers/qnn/builder/opbuilder/cast_op_builder.cc index d3bdee02437e4..3139c05378171 100644 --- a/onnxruntime/core/providers/qnn/builder/opbuilder/cast_op_builder.cc +++ b/onnxruntime/core/providers/qnn/builder/opbuilder/cast_op_builder.cc @@ -4,12 +4,11 @@ #include #include +#include "core/providers/qnn/builder/opbuilder/base_op_builder.h" #include "core/providers/qnn/builder/qnn_model_wrapper.h" #include "core/providers/qnn/builder/op_builder_factory.h" #include "core/providers/qnn/builder/qnn_utils.h" -#include "base_op_builder.h" - namespace onnxruntime { namespace qnn { diff --git a/onnxruntime/core/providers/qnn/builder/opbuilder/clip_op_builder.cc b/onnxruntime/core/providers/qnn/builder/opbuilder/clip_op_builder.cc index e5dc4d04afefd..23b3dfb063ba2 100644 --- a/onnxruntime/core/providers/qnn/builder/opbuilder/clip_op_builder.cc +++ b/onnxruntime/core/providers/qnn/builder/opbuilder/clip_op_builder.cc @@ -4,14 +4,11 @@ #include #include -#include "core/providers/common.h" -#include "core/providers/shared/utils/utils.h" +#include "core/providers/qnn/builder/opbuilder/base_op_builder.h" #include "core/providers/qnn/builder/qnn_model_wrapper.h" #include "core/providers/qnn/builder/op_builder_factory.h" #include "core/providers/qnn/builder/qnn_utils.h" -#include "base_op_builder.h" - namespace onnxruntime { namespace qnn { class ClipOpBuilder : public BaseOpBuilder { diff --git a/onnxruntime/core/providers/qnn/builder/opbuilder/conv_op_builder.cc b/onnxruntime/core/providers/qnn/builder/opbuilder/conv_op_builder.cc index 12887f0fb72d6..0f92778252d48 100644 --- a/onnxruntime/core/providers/qnn/builder/opbuilder/conv_op_builder.cc +++ b/onnxruntime/core/providers/qnn/builder/opbuilder/conv_op_builder.cc @@ -1,16 +1,11 @@ // Copyright (c) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. -#include "core/providers/common.h" -#include "core/providers/shared/utils/utils.h" -#include "core/framework/tensorprotoutils.h" +#include "core/providers/qnn/builder/opbuilder/base_op_builder.h" #include "core/providers/qnn/builder/qnn_model_wrapper.h" #include "core/providers/qnn/builder/op_builder_factory.h" -#include "core/common/safeint.h" #include "core/providers/qnn/builder/qnn_utils.h" -#include "base_op_builder.h" - namespace onnxruntime { namespace qnn { @@ -211,9 +206,9 @@ Status ConvOpBuilder::ProcessConv2D3DInputs(QnnModelWrapper& qnn_model_wrapper, // Change shape to HWCN, it could be initializer or normal input if (conv_type == OnnxConvType::kConv) { - ORT_RETURN_IF_ERROR(NchwShapeToHwcn(input_info.shape, actual_shape)); + ORT_RETURN_IF_ERROR(NchwShapeToHwcn(input_info.shape, actual_shape)); } else if (conv_type == OnnxConvType::kConvTranspose) { - ORT_RETURN_IF_ERROR(CnhwShapeToHwcn(input_info.shape, actual_shape)); + ORT_RETURN_IF_ERROR(CnhwShapeToHwcn(input_info.shape, actual_shape)); } else { return ORT_MAKE_STATUS(ONNXRUNTIME, FAIL, "QNN EP: Unexpected convolution op type: ", node_unit.OpType().c_str()); } @@ -413,9 +408,9 @@ Status ConvOpBuilder::ProcessConv1DInputs(QnnModelWrapper& qnn_model_wrapper, // Create the final shape after the weights are transposed to HWCN. if (conv_type == OnnxConvType::kConv) { - ORT_RETURN_IF_ERROR(NchwShapeToHwcn(shape_2d, final_shape)); + ORT_RETURN_IF_ERROR(NchwShapeToHwcn(shape_2d, final_shape)); } else if (conv_type == OnnxConvType::kConvTranspose) { - ORT_RETURN_IF_ERROR(CnhwShapeToHwcn(shape_2d, final_shape)); + ORT_RETURN_IF_ERROR(CnhwShapeToHwcn(shape_2d, final_shape)); } else { return ORT_MAKE_STATUS(ONNXRUNTIME, FAIL, "QNN EP: Unexpected convolution op type: ", node_unit.OpType().c_str()); } @@ -434,16 +429,6 @@ Status ConvOpBuilder::ProcessConv1DInputs(QnnModelWrapper& qnn_model_wrapper, return static_cast(dim); }); - const TensorShape tensor_shape = TensorShape::FromExistingBuffer(shape_2d_int64); // Does not own shape data. - const DataTypeImpl* tensor_dtype = DataTypeImpl::TensorTypeFromONNXEnum( - input_info.initializer_tensor->data_type()) - ->GetElementType(); - ORT_RETURN_IF_ERROR(qnn_model_wrapper.UnpackInitializerData(*input_info.initializer_tensor, unpacked_tensor)); - - Tensor tensor_2d(tensor_dtype, tensor_shape, unpacked_tensor.data(), OrtMemoryInfo{}); // Does not own data. - ONNX_NAMESPACE::TensorProto reshaped_initializer = onnxruntime::utils::TensorToTensorProto(tensor_2d, - reshape_output); - // The reshape (unsqueeze) may require us to shift the quant parameter's axis. if (input_info.quant_param.IsPerChannel()) { ORT_RETURN_IF_ERROR(input_info.quant_param.HandleUnsqueeze(input_info.shape, shape_2d)); @@ -452,10 +437,21 @@ Status ConvOpBuilder::ProcessConv1DInputs(QnnModelWrapper& qnn_model_wrapper, // // Get transposed initializer bytes. // + std::vector original_tensor_bytes; + ORT_RETURN_IF_ERROR(qnn_model_wrapper.UnpackInitializerData(*input_info.initializer_tensor, + original_tensor_bytes)); + unpacked_tensor.resize(original_tensor_bytes.size()); + const size_t elem_byte_size = qnn::utils::GetElementSizeByType( + static_cast(input_info.initializer_tensor->data_type())); + ORT_RETURN_IF(elem_byte_size == 0, "Can't get element byte size from given ONNX type for initializer ", + input1_name.c_str()); + if (conv_type == OnnxConvType::kConv) { - ORT_RETURN_IF_ERROR(TransposeFromNchwToHwcn(qnn_model_wrapper, reshaped_initializer, unpacked_tensor)); + ORT_RETURN_IF_ERROR(TransposeFromNchwToHwcn(std::move(shape_2d_int64), elem_byte_size, original_tensor_bytes, + unpacked_tensor, /*is_3d*/ false)); } else if (conv_type == OnnxConvType::kConvTranspose) { - ORT_RETURN_IF_ERROR(TransposeFromCnhwToHwcn(qnn_model_wrapper, reshaped_initializer, unpacked_tensor)); + ORT_RETURN_IF_ERROR(TransposeFromCnhwToHwcn(std::move(shape_2d_int64), elem_byte_size, original_tensor_bytes, + unpacked_tensor, /*is_3d*/ false)); } else { return ORT_MAKE_STATUS(ONNXRUNTIME, FAIL, "QNN EP: Unexpected convolution op type: ", node_unit.OpType().c_str()); } diff --git a/onnxruntime/core/providers/qnn/builder/opbuilder/expand_op_builder.cc b/onnxruntime/core/providers/qnn/builder/opbuilder/expand_op_builder.cc index 64f676aaa9875..2bae3452199a5 100644 --- a/onnxruntime/core/providers/qnn/builder/opbuilder/expand_op_builder.cc +++ b/onnxruntime/core/providers/qnn/builder/opbuilder/expand_op_builder.cc @@ -1,14 +1,10 @@ // Copyright (c) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. -#include "core/providers/common.h" -#include "core/providers/shared/utils/utils.h" +#include "core/providers/qnn/builder/opbuilder/base_op_builder.h" #include "core/providers/qnn/builder/qnn_model_wrapper.h" #include "core/providers/qnn/builder/op_builder_factory.h" #include "core/providers/qnn/builder/qnn_utils.h" -#include "core/common/safeint.h" - -#include "base_op_builder.h" namespace onnxruntime { namespace qnn { diff --git a/onnxruntime/core/providers/qnn/builder/opbuilder/gather_op_builder.cc b/onnxruntime/core/providers/qnn/builder/opbuilder/gather_op_builder.cc index 5549716751d4b..d25ec3f333bf1 100644 --- a/onnxruntime/core/providers/qnn/builder/opbuilder/gather_op_builder.cc +++ b/onnxruntime/core/providers/qnn/builder/opbuilder/gather_op_builder.cc @@ -2,14 +2,10 @@ // Licensed under the MIT License. #include -#include "core/providers/common.h" -#include "core/providers/shared/utils/utils.h" +#include "core/providers/qnn/builder/opbuilder/base_op_builder.h" #include "core/providers/qnn/builder/qnn_model_wrapper.h" #include "core/providers/qnn/builder/op_builder_factory.h" #include "core/providers/qnn/builder/qnn_utils.h" -#include "core/common/safeint.h" - -#include "base_op_builder.h" namespace onnxruntime { namespace qnn { diff --git a/onnxruntime/core/providers/qnn/builder/opbuilder/gemm_op_builder.cc b/onnxruntime/core/providers/qnn/builder/opbuilder/gemm_op_builder.cc index eeee26c177281..76bc766d2b04d 100644 --- a/onnxruntime/core/providers/qnn/builder/opbuilder/gemm_op_builder.cc +++ b/onnxruntime/core/providers/qnn/builder/opbuilder/gemm_op_builder.cc @@ -1,14 +1,10 @@ // Copyright (c) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. -#include "core/providers/common.h" -#include "core/providers/shared/utils/utils.h" +#include "core/providers/qnn/builder/opbuilder/base_op_builder.h" #include "core/providers/qnn/builder/qnn_model_wrapper.h" #include "core/providers/qnn/builder/op_builder_factory.h" #include "core/providers/qnn/builder/qnn_utils.h" -#include "core/common/safeint.h" - -#include "base_op_builder.h" namespace onnxruntime { namespace qnn { diff --git a/onnxruntime/core/providers/qnn/builder/opbuilder/instance_norm_op_builder.cc b/onnxruntime/core/providers/qnn/builder/opbuilder/instance_norm_op_builder.cc index 4b8d079c0062a..d77d9534bf1c4 100644 --- a/onnxruntime/core/providers/qnn/builder/opbuilder/instance_norm_op_builder.cc +++ b/onnxruntime/core/providers/qnn/builder/opbuilder/instance_norm_op_builder.cc @@ -1,16 +1,10 @@ // Copyright (c) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. -#include "core/providers/common.h" -#include "core/providers/shared/utils/utils.h" -#include "core/framework/tensorprotoutils.h" +#include "core/providers/qnn/builder/opbuilder/base_op_builder.h" #include "core/providers/qnn/builder/qnn_utils.h" #include "core/providers/qnn/builder/qnn_model_wrapper.h" #include "core/providers/qnn/builder/op_builder_factory.h" -#include "core/common/safeint.h" -#include "onnx/defs/data_type_utils.h" - -#include "base_op_builder.h" namespace onnxruntime { namespace qnn { diff --git a/onnxruntime/core/providers/qnn/builder/opbuilder/layer_norm_op_builder.cc b/onnxruntime/core/providers/qnn/builder/opbuilder/layer_norm_op_builder.cc index d1a0e88686f39..fc92f42b376bc 100644 --- a/onnxruntime/core/providers/qnn/builder/opbuilder/layer_norm_op_builder.cc +++ b/onnxruntime/core/providers/qnn/builder/opbuilder/layer_norm_op_builder.cc @@ -2,16 +2,10 @@ // Licensed under the MIT License. #include -#include "core/providers/common.h" -#include "core/providers/shared/utils/utils.h" -#include "core/framework/tensorprotoutils.h" +#include "core/providers/qnn/builder/opbuilder/base_op_builder.h" #include "core/providers/qnn/builder/qnn_utils.h" #include "core/providers/qnn/builder/qnn_model_wrapper.h" #include "core/providers/qnn/builder/op_builder_factory.h" -#include "core/common/safeint.h" -#include "onnx/defs/data_type_utils.h" - -#include "base_op_builder.h" namespace onnxruntime { namespace qnn { diff --git a/onnxruntime/core/providers/qnn/builder/opbuilder/lrn_op_builder.cc b/onnxruntime/core/providers/qnn/builder/opbuilder/lrn_op_builder.cc index 2f66069b6609e..3c9bdf0e7f8aa 100644 --- a/onnxruntime/core/providers/qnn/builder/opbuilder/lrn_op_builder.cc +++ b/onnxruntime/core/providers/qnn/builder/opbuilder/lrn_op_builder.cc @@ -2,11 +2,9 @@ // Licensed under the MIT License. #include "core/providers/qnn/builder/opbuilder/base_op_builder.h" -#include "core/providers/shared/utils/utils.h" +#include "core/providers/qnn/builder/qnn_utils.h" #include "core/providers/qnn/builder/qnn_model_wrapper.h" #include "core/providers/qnn/builder/op_builder_factory.h" -#include "core/common/safeint.h" -#include "onnx/defs/data_type_utils.h" #include "QnnOpDef.h" // From QNN SDK: contains QNN constants (e.g., op names, param values). diff --git a/onnxruntime/core/providers/qnn/builder/opbuilder/matmul_op_builder.cc b/onnxruntime/core/providers/qnn/builder/opbuilder/matmul_op_builder.cc index bac08f1993f47..5a158af8d542a 100644 --- a/onnxruntime/core/providers/qnn/builder/opbuilder/matmul_op_builder.cc +++ b/onnxruntime/core/providers/qnn/builder/opbuilder/matmul_op_builder.cc @@ -1,23 +1,22 @@ // Copyright (c) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. -#include "core/common/safeint.h" -#include "core/providers/common.h" +#include "core/providers/qnn/ort_api.h" #include "core/providers/qnn/builder/op_builder_factory.h" #include "core/providers/qnn/builder/opbuilder/base_op_builder.h" #include "core/providers/qnn/builder/qnn_model_wrapper.h" #include "core/providers/qnn/builder/qnn_utils.h" -#include "core/providers/shared/utils/utils.h" namespace onnxruntime { namespace qnn { /** - * ONNX's MatMul supports 1D tensor as input on both size, but neither QNN's MatMul nor FullyConnected supports it. - * So we need to add Reshape Ops if necessary. + * An ONNX MatMul can be translated to either a QNN MatMul or a QNN FullyConnected. + * ONNX's MatMul suports inputs of rank 1, but neither QNN's MatMul nor FullyConnected support two rank 1 inputs. + * So, we need to add Reshape Ops if necessary. * In two cases, FullyConnected (input_1's shape is [n, k]) is used instead of MatMul without extra Transpose Op: - * 1. input_1 is 2D initializer. - * 2. input_1 is 1D tensor. + * 1. input_1 is a rank 2 initializer. + * 2. input_1 is a rank 1 tensor. */ class MatMulOpBuilder : public BaseOpBuilder { public: @@ -31,109 +30,207 @@ class MatMulOpBuilder : public BaseOpBuilder { Status ProcessAttributesAndOutputs(QnnModelWrapper& qnn_model_wrapper, const NodeUnit& node_unit, std::vector&& input_names, const logging::Logger& logger, bool do_op_validation) const override ORT_MUST_USE_RESULT; + + private: + Status ProcessInputsForQnnMatMul(QnnModelWrapper& qnn_model_wrapper, + const NodeUnit& node_unit, + const TensorInfo& input_info_0, + const TensorInfo& input_info_1, + const logging::Logger& logger, + std::vector& input_names, + bool do_op_validation) const ORT_MUST_USE_RESULT; + Status ProcessInputsForQnnFullyConnected(QnnModelWrapper& qnn_model_wrapper, + const NodeUnit& node_unit, + const TensorInfo& input_info_0, + const TensorInfo& input_info_1, + const logging::Logger& logger, + std::vector& input_names, + bool do_op_validation) const ORT_MUST_USE_RESULT; }; namespace { +// Inserts a QNN Convert operator to convert from one quantization type (e.g., uint16) to another (e.g., uint8). +Status InsertConvertOp(QnnModelWrapper& qnn_model_wrapper, + const std::string& convert_input_name, + const std::string& convert_output_name, + Qnn_DataType_t input_qnn_data_type, + Qnn_DataType_t output_qnn_data_type, + int32_t input_offset, + float input_scale, + const std::vector& output_shape, + bool do_op_validation) { + // Assume input is already handled. + float qmin = 0.0f; + float qmax = 255.0f; + ORT_RETURN_IF_ERROR(qnn::utils::GetQminQmax(input_qnn_data_type, qmin, qmax)); + double value_min = qnn::utils::Dequantize(input_offset, input_scale, qmin); + double value_max = qnn::utils::Dequantize(input_offset, input_scale, qmax); + float scale = 0.0f; + int32_t offset = 0; + ORT_RETURN_IF_ERROR(qnn::utils::GetQuantParams(static_cast(value_min), + static_cast(value_max), + output_qnn_data_type, + scale, + offset)); + + std::vector output_shape_copy = output_shape; + QnnTensorWrapper convert_output_tensorwrapper(convert_output_name, + QNN_TENSOR_TYPE_NATIVE, + output_qnn_data_type, + QnnQuantParamsWrapper(scale, offset), + std::move(output_shape_copy)); + ORT_RETURN_IF_NOT(qnn_model_wrapper.AddTensorWrapper(std::move(convert_output_tensorwrapper)), "Failed to add tensor."); + + ORT_RETURN_IF_NOT(qnn_model_wrapper.CreateQnnNode(convert_output_name, + QNN_OP_PACKAGE_NAME_QTI_AISW, + "Convert", + {convert_input_name}, + {convert_output_name}, + {}, + do_op_validation), + "Failed to add node."); + return Status::OK(); +} + +inline bool IsQuant16bit(Qnn_DataType_t qnn_data_type) { + return qnn_data_type == QNN_DATATYPE_UFIXED_POINT_16 || qnn_data_type == QNN_DATATYPE_SFIXED_POINT_16; +} + Status CheckInputs(const QnnModelWrapper& qnn_model_wrapper, const NodeUnitIODef& input_def_0, const NodeUnitIODef& input_def_1, TensorInfo& input_info_0, TensorInfo& input_info_1, bool& use_fully_connected) { ORT_RETURN_IF_ERROR(qnn_model_wrapper.GetTensorInfo(input_def_0, input_info_0)); ORT_RETURN_IF_ERROR(qnn_model_wrapper.GetTensorInfo(input_def_1, input_info_1)); - // Use FullyConnected if 2nd input is 2D initializer or 1D tensor. +#if QNN_API_VERSION_MAJOR >= 2 && QNN_API_VERSION_MINOR <= 20 + // Validation crashes if use QNN FullyConnected in QNN SDK versions 2.26 - 2.27 + // Just use QNN MatMul for these older QNN SDK versions. + use_fully_connected = false; +#else + // Use FullyConnected if 2nd input is a rank 2 initializer or a rank 1 tensor. // FullyConnected cannot pass the Op validation if keep_dims is true, so if input_0 is per-channel quantized tensor - // with rank > 2, it's not easy to set the quantization parameters for the output reshaped 2D tensor. + // with rank > 2, it's not easy to set the quantization parameters for the output reshaped rank 2 tensor. // In this case, we will not use FullyConnected. use_fully_connected = (input_info_1.shape.size() == 2 && input_info_1.is_initializer) || input_info_1.shape.size() == 1; use_fully_connected = use_fully_connected && !(input_info_0.quant_param.IsPerChannel() && input_info_0.shape.size() > 2); + // Don't use FullyConnected if both inputs are dynamic and uint16 (quantized) + use_fully_connected = use_fully_connected && !(IsQuant16bit(input_info_0.qnn_data_type) && + !input_info_0.is_initializer && + IsQuant16bit(input_info_1.qnn_data_type) && + !input_info_1.is_initializer); +#endif return Status::OK(); } -} // namespace +// Process input[0] for ONNX MatMul that can be translated to either a QNN MatMul or a QNN FullyConnected. +Status ProcessInput0(QnnModelWrapper& qnn_model_wrapper, + const TensorInfo& input_0_info, + const std::string& original_input_0_name, + std::vector& input_names, + const logging::Logger& logger, + bool do_op_validation) { + bool reshape_input_0 = input_0_info.shape.size() == 1; + std::string actual_input_0_name = original_input_0_name; -Status MatMulOpBuilder::ProcessInputs(QnnModelWrapper& qnn_model_wrapper, const NodeUnit& node_unit, - const logging::Logger& logger, std::vector& input_names, - bool do_op_validation) const { - const auto& inputs = node_unit.Inputs(); - TensorInfo input_info_0{}; - TensorInfo input_info_1{}; - bool use_fully_connected = false; - ORT_RETURN_IF_ERROR( - CheckInputs(qnn_model_wrapper, inputs[0], inputs[1], input_info_0, input_info_1, use_fully_connected)); - bool reshape_input_0 = input_info_0.shape.size() == 1; - bool reshape_input_1 = input_info_1.shape.size() == 1; - - // Process input 0. - const std::string& org_input_0_name = inputs[0].node_arg.Name(); - std::string input_0_name = org_input_0_name; if (reshape_input_0) { - input_0_name = org_input_0_name + "_ort_qnn_ep_reshape"; - std::vector shape_2d{1, input_info_0.shape[0]}; - QnnQuantParamsWrapper quant_param_2d = input_info_0.quant_param.Copy(); - ORT_RETURN_IF_ERROR(quant_param_2d.HandleUnsqueeze(input_info_0.shape, shape_2d)); + actual_input_0_name = original_input_0_name + "_ort_qnn_ep_reshape"; + std::vector shape_2d{1, input_0_info.shape[0]}; + QnnQuantParamsWrapper quant_param_2d = input_0_info.quant_param.Copy(); + ORT_RETURN_IF_ERROR(quant_param_2d.HandleUnsqueeze(input_0_info.shape, shape_2d)); // If input_0 is initializer, unpack it and add the tensor with new quantization parameter and shape. // Otherwise, add a Reshape node. - if (input_info_0.is_initializer) { + if (input_0_info.is_initializer) { std::vector unpacked_tensor; - ORT_RETURN_IF_ERROR(qnn_model_wrapper.UnpackInitializerData(*input_info_0.initializer_tensor, unpacked_tensor)); - Qnn_TensorType_t tensor_type = qnn_model_wrapper.GetTensorType(org_input_0_name); - QnnTensorWrapper input_tensorwrapper(input_0_name, tensor_type, input_info_0.qnn_data_type, + ORT_RETURN_IF_ERROR(qnn_model_wrapper.UnpackInitializerData(*input_0_info.initializer_tensor, unpacked_tensor)); + QnnTensorWrapper input_tensorwrapper(actual_input_0_name, QNN_TENSOR_TYPE_STATIC, input_0_info.qnn_data_type, std::move(quant_param_2d), std::move(shape_2d), std::move(unpacked_tensor)); ORT_RETURN_IF_NOT(qnn_model_wrapper.AddTensorWrapper(std::move(input_tensorwrapper)), "Failed to add tensor."); } else { - ORT_RETURN_IF_ERROR(qnn_model_wrapper.AddReshapeNode(org_input_0_name, input_0_name, input_info_0.shape, shape_2d, - input_info_0.qnn_data_type, input_info_0.quant_param, + ORT_RETURN_IF_ERROR(qnn_model_wrapper.AddReshapeNode(original_input_0_name, actual_input_0_name, + input_0_info.shape, shape_2d, + input_0_info.qnn_data_type, input_0_info.quant_param, quant_param_2d, do_op_validation, - qnn_model_wrapper.IsGraphInput(org_input_0_name), false)); + qnn_model_wrapper.IsGraphInput(original_input_0_name), false)); } } else { - if (qnn_model_wrapper.IsQnnTensorWrapperExist(input_0_name)) { - LOGS(logger, VERBOSE) << "Tensor already added, skip it: " << input_0_name; + if (qnn_model_wrapper.IsQnnTensorWrapperExist(actual_input_0_name)) { + LOGS(logger, VERBOSE) << "Tensor already added, skip it: " << actual_input_0_name; } else { QnnTensorWrapper input_0_tensor; - ORT_RETURN_IF_ERROR(qnn_model_wrapper.MakeTensorWrapper(inputs[0], input_0_tensor)); + ORT_RETURN_IF_ERROR(qnn_model_wrapper.MakeTensorWrapper(input_0_info, actual_input_0_name, input_0_tensor)); ORT_RETURN_IF_NOT(qnn_model_wrapper.AddTensorWrapper(std::move(input_0_tensor)), "Failed to add tensor."); } } - input_names.emplace_back(input_0_name); + input_names.emplace_back(actual_input_0_name); + + return Status::OK(); +} +} // namespace + +// Process operator inputs. Dispatches to other processing functions depending on whether we're +// translating an ONNX MatMul to a QNN MatMul or a QNN FullyConnected. +Status MatMulOpBuilder::ProcessInputs(QnnModelWrapper& qnn_model_wrapper, const NodeUnit& node_unit, + const logging::Logger& logger, std::vector& input_names, + bool do_op_validation) const { + const auto& inputs = node_unit.Inputs(); + TensorInfo input_info_0{}; + TensorInfo input_info_1{}; + bool use_fully_connected = false; + ORT_RETURN_IF_ERROR( + CheckInputs(qnn_model_wrapper, inputs[0], inputs[1], input_info_0, input_info_1, use_fully_connected)); + + if (use_fully_connected) { + return ProcessInputsForQnnFullyConnected(qnn_model_wrapper, + node_unit, + input_info_0, + input_info_1, + logger, + input_names, + do_op_validation); + } + return ProcessInputsForQnnMatMul(qnn_model_wrapper, + node_unit, + input_info_0, + input_info_1, + logger, + input_names, + do_op_validation); +} + +Status MatMulOpBuilder::ProcessInputsForQnnMatMul(QnnModelWrapper& qnn_model_wrapper, + const NodeUnit& node_unit, + const TensorInfo& input_info_0, + const TensorInfo& input_info_1, + const logging::Logger& logger, + std::vector& input_names, + bool do_op_validation) const { + const auto& inputs = node_unit.Inputs(); + const bool reshape_input_1 = input_info_1.shape.size() == 1; + + const std::string& org_input_0_name = inputs[0].node_arg.Name(); + ORT_RETURN_IF_ERROR(ProcessInput0(qnn_model_wrapper, input_info_0, org_input_0_name, input_names, + logger, do_op_validation)); // Process input 1. const std::string& org_input_1_name = inputs[1].node_arg.Name(); std::string input_1_name = org_input_1_name; - if (reshape_input_1 || use_fully_connected) { + if (reshape_input_1) { + // Input[1] is a rank 1 tensor that needs to be reshaped. std::vector shape_2d; QnnQuantParamsWrapper quant_param_2d = input_info_1.quant_param.Copy(); - if (reshape_input_1) { - // Input is 1D tensor. - input_1_name = org_input_1_name + "_ort_qnn_ep_reshape"; - if (use_fully_connected) { - // FullyConnected requires input_1's shape to be [n, k]. - shape_2d = {1, input_info_1.shape[0]}; - } else { - shape_2d = {input_info_1.shape[0], 1}; - } - ORT_RETURN_IF_ERROR(quant_param_2d.HandleUnsqueeze(input_info_1.shape, shape_2d)); - } else { - input_1_name = org_input_1_name + "_ort_qnn_ep_transpose"; - shape_2d = {input_info_1.shape[1], input_info_1.shape[0]}; - ORT_RETURN_IF_ERROR(quant_param_2d.HandleTranspose(std::vector({1, 0}))); - } + input_1_name = org_input_1_name + "_ort_qnn_ep_reshape"; + shape_2d = {input_info_1.shape[0], 1}; + ORT_RETURN_IF_ERROR(quant_param_2d.HandleUnsqueeze(input_info_1.shape, shape_2d)); // If input_1 is initializer, unpack it and add the tensor with new quantization parameter and shape. // Otherwise, add a Reshape node. if (input_info_1.is_initializer) { std::vector unpacked_tensor; - if (use_fully_connected && !reshape_input_1) { - // 2D initializer should be transposed to [n, k]. - ORT_RETURN_IF_ERROR(TwoDimensionTranspose(qnn_model_wrapper, input_info_1.shape, - *input_info_1.initializer_tensor, unpacked_tensor)); - } else { - ORT_RETURN_IF_ERROR(qnn_model_wrapper.UnpackInitializerData(*input_info_1.initializer_tensor, unpacked_tensor)); - } + ORT_RETURN_IF_ERROR(qnn_model_wrapper.UnpackInitializerData(*input_info_1.initializer_tensor, unpacked_tensor)); Qnn_TensorType_t tensor_type = qnn_model_wrapper.GetTensorType(org_input_1_name); QnnTensorWrapper input_tensorwrapper(input_1_name, tensor_type, input_info_1.qnn_data_type, @@ -156,6 +253,108 @@ Status MatMulOpBuilder::ProcessInputs(QnnModelWrapper& qnn_model_wrapper, const } input_names.emplace_back(input_1_name); + // Workaround that inserts a QNN Convert op before input[1] (converts from quantized uint16 to quantized uint8) + // to avoid a QNN validation failure. + // + // QNN graph WITHOUT workaround (fails validation): + // input_0_uint16 ---> MatMul ---> output_uint16 + // ^ + // | + // input_1_uint16 -----+ + // + // QNN graph WITH workaround (passes validation): + // input_0_uint16 ----------------------> MatMul ---> output_uint16 + // ^ + // | + // input_1_uint16 --> Convert(to uint8) --+ + if (!input_info_0.is_initializer && !input_info_1.is_initializer && + input_info_0.qnn_data_type == input_info_1.qnn_data_type && + input_info_0.qnn_data_type == QNN_DATATYPE_UFIXED_POINT_16) { + ORT_RETURN_IF_NOT(input_info_1.quant_param.IsPerTensor(), + "MatMul's activation inputs only support per-tensor quantization"); + const Qnn_QuantizeParams_t& quant_param = input_info_1.quant_param.Get(); + // insert Convert op after input1 + std::string convert_input_name = input_names.back(); + input_names.pop_back(); + const std::string& matmul_output_name = node_unit.Outputs()[0].node_arg.Name(); + std::string convert_output_name = convert_input_name + "_convert_" + matmul_output_name; + std::vector input_1_shape = input_info_1.shape; + if (reshape_input_1) { + input_1_shape = {input_info_1.shape[0], 1}; + } + ORT_RETURN_IF_ERROR(InsertConvertOp(qnn_model_wrapper, + convert_input_name, + convert_output_name, + input_info_1.qnn_data_type, + QNN_DATATYPE_UFIXED_POINT_8, + quant_param.scaleOffsetEncoding.offset, + quant_param.scaleOffsetEncoding.scale, + input_1_shape, + do_op_validation)); + input_names.push_back(convert_output_name); + } + return Status::OK(); +} + +Status MatMulOpBuilder::ProcessInputsForQnnFullyConnected(QnnModelWrapper& qnn_model_wrapper, + const NodeUnit& node_unit, + const TensorInfo& input_info_0, + const TensorInfo& input_info_1, + const logging::Logger& logger, + std::vector& input_names, + bool do_op_validation) const { + const auto& inputs = node_unit.Inputs(); + const bool reshape_input_1 = input_info_1.shape.size() == 1; + + const std::string& org_input_0_name = inputs[0].node_arg.Name(); + ORT_RETURN_IF_ERROR(ProcessInput0(qnn_model_wrapper, input_info_0, org_input_0_name, input_names, + logger, do_op_validation)); + + // Process input 1. + const std::string& org_input_1_name = inputs[1].node_arg.Name(); + std::string input_1_name = org_input_1_name; + std::vector shape_2d; + QnnQuantParamsWrapper quant_param_2d = input_info_1.quant_param.Copy(); + if (reshape_input_1) { + // Input[1] is a rank 1 tensor that needs to be reshaped. + input_1_name = org_input_1_name + "_ort_qnn_ep_reshape"; + + // FullyConnected requires input_1's shape to be [n, k]. + shape_2d = {1, input_info_1.shape[0]}; + ORT_RETURN_IF_ERROR(quant_param_2d.HandleUnsqueeze(input_info_1.shape, shape_2d)); + } else { + assert(input_info_1.shape.size() == 2); + input_1_name = org_input_1_name + "_ort_qnn_ep_transpose"; + shape_2d = {input_info_1.shape[1], input_info_1.shape[0]}; + ORT_RETURN_IF_ERROR(quant_param_2d.HandleTranspose(std::vector({1, 0}))); + } + + // If input_1 is initializer, unpack it and add the tensor with new quantization parameter and shape. + // Otherwise, add a Reshape node. + if (input_info_1.is_initializer) { + std::vector unpacked_tensor; + if (!reshape_input_1) { + // 2D initializer should be transposed to [n, k]. + std::vector original_shape_copy = input_info_1.shape; + ORT_RETURN_IF_ERROR(TwoDimensionTranspose(qnn_model_wrapper, + original_shape_copy, // Will be modified to new shape (unnecessary) + *input_info_1.initializer_tensor, + unpacked_tensor)); + } else { + ORT_RETURN_IF_ERROR(qnn_model_wrapper.UnpackInitializerData(*input_info_1.initializer_tensor, unpacked_tensor)); + } + + Qnn_TensorType_t tensor_type = qnn_model_wrapper.GetTensorType(org_input_1_name); + QnnTensorWrapper input_tensorwrapper(input_1_name, tensor_type, input_info_1.qnn_data_type, + std::move(quant_param_2d), std::move(shape_2d), std::move(unpacked_tensor)); + ORT_RETURN_IF_NOT(qnn_model_wrapper.AddTensorWrapper(std::move(input_tensorwrapper)), "Failed to add tensor."); + } else { + ORT_RETURN_IF_ERROR(qnn_model_wrapper.AddReshapeNode(org_input_1_name, input_1_name, input_info_1.shape, shape_2d, + input_info_1.qnn_data_type, input_info_1.quant_param, + quant_param_2d, do_op_validation, + qnn_model_wrapper.IsGraphInput(org_input_1_name), false)); + } + input_names.emplace_back(input_1_name); return Status::OK(); } @@ -172,6 +371,24 @@ Status MatMulOpBuilder::ProcessAttributesAndOutputs(QnnModelWrapper& qnn_model_w bool reshape_input_1 = input_info_1.shape.size() == 1; bool reshape_output = reshape_input_0 || reshape_input_1 || (use_fully_connected && input_info_0.shape.size() > 2); + // For QNN MatMul: set the input transpose parameters to their default values of 0. These parameters should be + // optional, but older versions of QNN SDK failed validation if not explicitly provided. + std::vector param_tensor_names; + if (!use_fully_connected) { + Qnn_Scalar_t scalar_param = QNN_SCALAR_INIT; + scalar_param.dataType = QNN_DATATYPE_BOOL_8; + scalar_param.bool8Value = 0; + QnnParamWrapper transpose_in0_param(node_unit.Index(), node_unit.Name(), QNN_OP_MAT_MUL_PARAM_TRANSPOSE_IN0, + scalar_param); + param_tensor_names.push_back(transpose_in0_param.GetParamTensorName()); + qnn_model_wrapper.AddParamWrapper(std::move(transpose_in0_param)); + + QnnParamWrapper transpose_in1_param(node_unit.Index(), node_unit.Name(), QNN_OP_MAT_MUL_PARAM_TRANSPOSE_IN1, + scalar_param); + param_tensor_names.push_back(transpose_in1_param.GetParamTensorName()); + qnn_model_wrapper.AddParamWrapper(std::move(transpose_in1_param)); + } + const std::string& org_output_name = node_unit.Outputs()[0].node_arg.Name(); std::string op_output_name = org_output_name; TensorInfo output_info{}; @@ -207,7 +424,8 @@ Status MatMulOpBuilder::ProcessAttributesAndOutputs(QnnModelWrapper& qnn_model_w "Failed to add output tensor."); ORT_RETURN_IF_NOT(qnn_model_wrapper.CreateQnnNode(utils::GetNodeName(node_unit), QNN_OP_PACKAGE_NAME_QTI_AISW, use_fully_connected ? QNN_OP_FULLY_CONNECTED : QNN_OP_MAT_MUL, - std::move(input_names), {op_output_name}, {}, do_op_validation), + std::move(input_names), {op_output_name}, + std::move(param_tensor_names), do_op_validation), "Failed to add fused Matmul node."); if (reshape_output) { diff --git a/onnxruntime/core/providers/qnn/builder/opbuilder/pad_op_builder.cc b/onnxruntime/core/providers/qnn/builder/opbuilder/pad_op_builder.cc index 5fc6d42a8a179..40e0ccdd4a6dd 100644 --- a/onnxruntime/core/providers/qnn/builder/opbuilder/pad_op_builder.cc +++ b/onnxruntime/core/providers/qnn/builder/opbuilder/pad_op_builder.cc @@ -1,15 +1,9 @@ // Copyright (c) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. -#include "core/providers/common.h" -#include "core/providers/shared/utils/utils.h" +#include "core/providers/qnn/builder/opbuilder/base_op_builder.h" #include "core/providers/qnn/builder/qnn_model_wrapper.h" #include "core/providers/qnn/builder/op_builder_factory.h" -#include "core/providers/cpu/tensor/slice_helper.h" -#include "core/providers/qnn/builder/op_builder_factory.h" -#include "core/common/safeint.h" - -#include "core/providers/qnn/builder/opbuilder/base_op_builder.h" #include "core/providers/qnn/builder/qnn_utils.h" namespace onnxruntime { diff --git a/onnxruntime/core/providers/qnn/builder/opbuilder/pool_op_builder.cc b/onnxruntime/core/providers/qnn/builder/opbuilder/pool_op_builder.cc index ef1990ad8e69a..795886fa255ed 100644 --- a/onnxruntime/core/providers/qnn/builder/opbuilder/pool_op_builder.cc +++ b/onnxruntime/core/providers/qnn/builder/opbuilder/pool_op_builder.cc @@ -1,16 +1,10 @@ // Copyright (c) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. -#include "core/providers/common.h" -#include "core/providers/shared/utils/utils.h" -#include "core/framework/tensorprotoutils.h" +#include "core/providers/qnn/builder/opbuilder/base_op_builder.h" #include "core/providers/qnn/builder/qnn_utils.h" #include "core/providers/qnn/builder/qnn_model_wrapper.h" #include "core/providers/qnn/builder/op_builder_factory.h" -#include "core/common/safeint.h" -#include "onnx/defs/data_type_utils.h" - -#include "base_op_builder.h" namespace onnxruntime { namespace qnn { diff --git a/onnxruntime/core/providers/qnn/builder/opbuilder/reduce_op_builder.cc b/onnxruntime/core/providers/qnn/builder/opbuilder/reduce_op_builder.cc index 77bc58bd6f833..a98110bc96fb2 100644 --- a/onnxruntime/core/providers/qnn/builder/opbuilder/reduce_op_builder.cc +++ b/onnxruntime/core/providers/qnn/builder/opbuilder/reduce_op_builder.cc @@ -2,16 +2,13 @@ // Licensed under the MIT License. #include -#include #include +#include +#include #include -#include "core/common/safeint.h" -#include "onnx/defs/data_type_utils.h" -#include "core/providers/common.h" -#include "core/framework/endian_utils.h" -#include "core/providers/shared/utils/utils.h" #include "core/providers/qnn/builder/opbuilder/base_op_builder.h" +#include "core/providers/qnn/ort_api.h" #include "core/providers/qnn/builder/op_builder_factory.h" #include "core/providers/qnn/builder/qnn_model_wrapper.h" #include "core/providers/qnn/builder/qnn_utils.h" @@ -71,7 +68,7 @@ class ReduceOpBuilder : public BaseOpBuilder { using AxesQnnIntType = uint32_t; Status GetAxesSet(QnnModelWrapper& qnn_model_wrapper, const NodeUnit& node_unit, - InlinedHashSet& axes_set) const; + std::set& axes_set) const; // Maps an operator type to the opset in which "axes" became an input instead of an attribute. static const std::array opset_with_axes_as_input; @@ -87,7 +84,7 @@ const std::array ReduceOpBuilder::opset_with_axes_as_ }; Status ReduceOpBuilder::GetAxesSet(QnnModelWrapper& qnn_model_wrapper, const NodeUnit& node_unit, - InlinedHashSet& axes_set) const { + std::set& axes_set) const { ReduceOpType reduce_op_type = GetReduceOpType(node_unit.OpType()); if (reduce_op_type == ReduceOpType::REDUCE_OP_TYPE_UNKNOWN) { return ORT_MAKE_STATUS(ONNXRUNTIME, FAIL, "QNN EP: Unknown reduce operator ", node_unit.OpType()); @@ -146,10 +143,7 @@ Status ReduceOpBuilder::GetAxesSet(QnnModelWrapper& qnn_model_wrapper, const Nod auto src_span = gsl::make_span(axes_bytes.data(), axes_bytes.size()); auto dst_span = gsl::make_span(reduce_axes.data(), reduce_axes.size()); - // Copy initializer bytes (stored in little-endian order) to vector of int64_t. - // ReadLittleEndian returns a status error if the source and destination spans do not have - // matching byte sizes. - ORT_RETURN_IF_ERROR(onnxruntime::utils::ReadLittleEndian(src_span, dst_span)); + std::memcpy(dst_span.data(), src_span.data(), src_span.size_bytes()); } } @@ -218,7 +212,7 @@ Status ReduceOpBuilder::ProcessAttributesAndOutputs(QnnModelWrapper& qnn_model_w // // Handle axes param. // - InlinedHashSet axes_set; + std::set axes_set; ORT_RETURN_IF_ERROR(GetAxesSet(qnn_model_wrapper, node_unit, axes_set)); const size_t num_axes = axes_set.size(); diff --git a/onnxruntime/core/providers/qnn/builder/opbuilder/reshape_op_builder.cc b/onnxruntime/core/providers/qnn/builder/opbuilder/reshape_op_builder.cc index b6f414da950d8..6fd67a72b64e1 100644 --- a/onnxruntime/core/providers/qnn/builder/opbuilder/reshape_op_builder.cc +++ b/onnxruntime/core/providers/qnn/builder/opbuilder/reshape_op_builder.cc @@ -1,15 +1,11 @@ // Copyright (c) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. -#include "core/providers/common.h" -#include "core/providers/shared/utils/utils.h" -#include "core/framework/tensorprotoutils.h" +#include "core/providers/qnn/builder/opbuilder/base_op_builder.h" #include "core/providers/qnn/builder/qnn_utils.h" #include "core/providers/qnn/builder/qnn_model_wrapper.h" #include "core/providers/qnn/builder/op_builder_factory.h" -#include "base_op_builder.h" - namespace onnxruntime { namespace qnn { diff --git a/onnxruntime/core/providers/qnn/builder/opbuilder/resize_op_builder.cc b/onnxruntime/core/providers/qnn/builder/opbuilder/resize_op_builder.cc index c62fca88b6ec2..5e173b7aff030 100644 --- a/onnxruntime/core/providers/qnn/builder/opbuilder/resize_op_builder.cc +++ b/onnxruntime/core/providers/qnn/builder/opbuilder/resize_op_builder.cc @@ -5,17 +5,10 @@ #include #include -#include "core/providers/common.h" -#include "core/providers/shared/utils/utils.h" -#include "core/framework/tensorprotoutils.h" -#include "core/providers/qnn/builder/qnn_model_wrapper.h" -#include "core/providers/qnn/builder/op_builder_factory.h" -#include "core/providers/cpu/tensor/slice_helper.h" -#include "core/providers/qnn/builder/op_builder_factory.h" -#include "core/common/safeint.h" - #include "core/providers/qnn/builder/opbuilder/base_op_builder.h" #include "core/providers/qnn/builder/qnn_utils.h" +#include "core/providers/qnn/builder/qnn_model_wrapper.h" +#include "core/providers/qnn/builder/op_builder_factory.h" namespace onnxruntime { namespace qnn { diff --git a/onnxruntime/core/providers/qnn/builder/opbuilder/simple_op_builder.cc b/onnxruntime/core/providers/qnn/builder/opbuilder/simple_op_builder.cc index a6c4203ad92e4..48c637cd2e951 100644 --- a/onnxruntime/core/providers/qnn/builder/opbuilder/simple_op_builder.cc +++ b/onnxruntime/core/providers/qnn/builder/opbuilder/simple_op_builder.cc @@ -1,16 +1,10 @@ // Copyright (c) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. -#include "core/providers/common.h" -#include "core/providers/shared/utils/utils.h" -#include "core/framework/tensorprotoutils.h" +#include "core/providers/qnn/builder/opbuilder/base_op_builder.h" #include "core/providers/qnn/builder/qnn_model_wrapper.h" #include "core/providers/qnn/builder/op_builder_factory.h" #include "core/providers/qnn/builder/qnn_utils.h" -#include "core/common/safeint.h" -#include "core/util/qmath.h" - -#include "base_op_builder.h" namespace onnxruntime { namespace qnn { @@ -22,11 +16,6 @@ class SimpleOpBuilder : public BaseOpBuilder { ORT_DISALLOW_COPY_ASSIGNMENT_AND_MOVE(SimpleOpBuilder); protected: - Status ProcessInputs(QnnModelWrapper& qnn_model_wrapper, - const NodeUnit& node_unit, - const logging::Logger& logger, - std::vector& input_names, - bool do_op_validation) const override ORT_MUST_USE_RESULT; Status ProcessAttributesAndOutputs(QnnModelWrapper& qnn_model_wrapper, const NodeUnit& node_unit, std::vector&& input_names, @@ -53,91 +42,6 @@ class SimpleOpBuilder : public BaseOpBuilder { static constexpr std::array gridsample_supported_padding_modes = {"zeros", "border", "reflection"}; }; -// Move to qnn_utils if it's re-usable -Status InsertConvertOp(QnnModelWrapper& qnn_model_wrapper, - const std::string& convert_input_name, - const std::string& convert_output_name, - Qnn_DataType_t input_qnn_data_type, - Qnn_DataType_t output_qnn_data_type, - int32_t input_offset, - float input_scale, - const std::vector& output_shape, - bool do_op_validation) { - // Assume input is already handled. - float qmin = 0.0f; - float qmax = 255.0f; - ORT_RETURN_IF_ERROR(qnn::utils::GetQminQmax(input_qnn_data_type, qmin, qmax)); - double value_min = qnn::utils::Dequantize(input_offset, input_scale, qmin); - double value_max = qnn::utils::Dequantize(input_offset, input_scale, qmax); - float scale = 0.0f; - int32_t offset = 0; - ORT_RETURN_IF_ERROR(qnn::utils::GetQuantParams(static_cast(value_min), - static_cast(value_max), - output_qnn_data_type, - scale, - offset)); - - std::vector output_shape_copy = output_shape; - QnnTensorWrapper convert_output_tensorwrapper(convert_output_name, - QNN_TENSOR_TYPE_NATIVE, - output_qnn_data_type, - QnnQuantParamsWrapper(scale, offset), - std::move(output_shape_copy)); - ORT_RETURN_IF_NOT(qnn_model_wrapper.AddTensorWrapper(std::move(convert_output_tensorwrapper)), "Failed to add tensor."); - - ORT_RETURN_IF_NOT(qnn_model_wrapper.CreateQnnNode(convert_output_name, - QNN_OP_PACKAGE_NAME_QTI_AISW, - "Convert", - {convert_input_name}, - {convert_output_name}, - {}, - do_op_validation), - "Failed to add node."); - return Status::OK(); -} - -Status SimpleOpBuilder::ProcessInputs(QnnModelWrapper& qnn_model_wrapper, - const NodeUnit& node_unit, - const logging::Logger& logger, - std::vector& input_names, - bool do_op_validation) const { - const std::string& op_type = node_unit.OpType(); - ORT_RETURN_IF_ERROR(BaseOpBuilder::ProcessInputs(qnn_model_wrapper, node_unit, logger, input_names, do_op_validation)); - - if (op_type == "MatMul") { - const auto& inputs = node_unit.Inputs(); - TensorInfo input0_info = {}; - TensorInfo input1_info = {}; - ORT_RETURN_IF_ERROR(qnn_model_wrapper.GetTensorInfo(inputs[0], input0_info)); - ORT_RETURN_IF_ERROR(qnn_model_wrapper.GetTensorInfo(inputs[1], input1_info)); - // Need to insert Convert op if both inputs are dynamic inputs and are ufixed_16 - if (!input0_info.is_initializer && !input1_info.is_initializer && - input0_info.qnn_data_type == input1_info.qnn_data_type && - input0_info.qnn_data_type == QNN_DATATYPE_UFIXED_POINT_16) { - ORT_RETURN_IF_NOT(input1_info.quant_param.IsPerTensor(), - "MatMul's activation inputs only support per-tensor quantization"); - const Qnn_QuantizeParams_t& quant_param = input1_info.quant_param.Get(); - // insert Convert op after input1 - std::string convert_input_name = input_names.back(); - input_names.pop_back(); - const std::string& matmul_output_name = node_unit.Outputs()[0].node_arg.Name(); - std::string convert_output_name = convert_input_name + "_convert_" + matmul_output_name; - ORT_RETURN_IF_ERROR(InsertConvertOp(qnn_model_wrapper, - convert_input_name, - convert_output_name, - input1_info.qnn_data_type, - QNN_DATATYPE_UFIXED_POINT_8, - quant_param.scaleOffsetEncoding.offset, - quant_param.scaleOffsetEncoding.scale, - input1_info.shape, - do_op_validation)); - input_names.push_back(convert_output_name); - } - } - - return Status::OK(); -} - Status SimpleOpBuilder::ExplicitOpCheck(QnnModelWrapper& qnn_model_wrapper, const NodeUnit& node_unit) const { const std::string& op_type = node_unit.OpType(); @@ -260,15 +164,16 @@ Status ProcessAlphaAttributeAsInput(QnnModelWrapper& qnn_model_wrapper, // Check LeakyRelu input 0 to see if it's quantized tensor bool is_quantized_tensor = node_unit.Outputs()[0].quant_param.has_value(); if (is_quantized_tensor) { - float scale; - uint8_t zero_point; - int64_t num_of_elements = 1; - concurrency::ThreadPool* thread_pool = nullptr; - GetQuantizationParameter(&tensor_data.alpha, num_of_elements, scale, zero_point, thread_pool); - unpacked_data.resize(1); - ParQuantizeLinearStd(&tensor_data.alpha, unpacked_data.data(), num_of_elements, scale, zero_point, thread_pool); - quantize_param = QnnQuantParamsWrapper(scale, static_cast(zero_point)); qnn_data_type = QNN_DATATYPE_UFIXED_POINT_8; + std::array scales = {1.0f}; + std::array offsets = {0}; + std::array shape = {1}; + auto float_data = gsl::make_span(&tensor_data.alpha, 1); + ORT_RETURN_IF_ERROR(qnn::utils::GetDataQuantParams(float_data, shape, scales, offsets, qnn_data_type)); + + unpacked_data.resize(1); + ORT_RETURN_IF_ERROR(qnn::utils::QuantizeData(float_data, shape, scales, offsets, unpacked_data, qnn_data_type)); + quantize_param = QnnQuantParamsWrapper(scales[0], static_cast(offsets[0])); } else { const auto& inputs = node_unit.Inputs(); TensorInfo input_info = {}; @@ -352,6 +257,22 @@ Status SimpleOpBuilder::ProcessAttributesAndOutputs(QnnModelWrapper& qnn_model_w if (node_unit.Domain() != kMSInternalNHWCDomain && (op_type == "DepthToSpace" || op_type == "SpaceToDepth" || op_type == "GridSample")) { return Status::OK(); } + +#if QNN_API_VERSION_MAJOR >= 2 && QNN_API_VERSION_MINOR >= 21 && QNN_API_VERSION_MINOR <= 23 + // Skip QNN validation for Tanh with uint16 (quantized) output. + // This gets around a Tanh QNN validation bug in QNN SDK 2.28.0 - 2.30.0. + // The QNN documentation states that the output scale and offset for ufixed_point_16 should be + // (1/32768) and -32768, respectively. However, the QNN validator incorrectly rejects these values. + if (op_type == "Tanh") { + TensorInfo output_info = {}; + ORT_RETURN_IF_ERROR(qnn_model_wrapper.GetTensorInfo(node_unit.Outputs()[0], output_info)); + if (output_info.qnn_data_type == QNN_DATATYPE_UFIXED_POINT_16) { + LOGS(logger, INFO) << "Skipping QNN validation for Tanh node '" + << node_unit.Name() << "' with quantized unit16 output."; + return Status::OK(); + } + } +#endif } std::vector param_tensor_names; @@ -378,19 +299,6 @@ Status SimpleOpBuilder::ProcessAttributesAndOutputs(QnnModelWrapper& qnn_model_w ORT_RETURN_IF(norm_p_order != 2, "QNN EP only supports LpNormalization with 'p' attribute equal to 2."); } - if (op_type == "MatMul") { - Qnn_Scalar_t scalar_param = QNN_SCALAR_INIT; - scalar_param.dataType = QNN_DATATYPE_BOOL_8; - scalar_param.bool8Value = 0; - QnnParamWrapper transpose_in0_param(node_unit.Index(), node_unit.Name(), QNN_OP_MAT_MUL_PARAM_TRANSPOSE_IN0, scalar_param); - param_tensor_names.push_back(transpose_in0_param.GetParamTensorName()); - qnn_model_wrapper.AddParamWrapper(std::move(transpose_in0_param)); - - QnnParamWrapper transpose_in1_param(node_unit.Index(), node_unit.Name(), QNN_OP_MAT_MUL_PARAM_TRANSPOSE_IN1, scalar_param); - param_tensor_names.push_back(transpose_in1_param.GetParamTensorName()); - qnn_model_wrapper.AddParamWrapper(std::move(transpose_in1_param)); - } - if (op_type == "LeakyRelu") { std::string input_name = "alpha"; ORT_RETURN_IF_ERROR(ProcessAlphaAttributeAsInput(qnn_model_wrapper, node_unit, input_name)); diff --git a/onnxruntime/core/providers/qnn/builder/opbuilder/slice_op_builder.cc b/onnxruntime/core/providers/qnn/builder/opbuilder/slice_op_builder.cc index b033c8723ea86..fcc7d27c3ada4 100644 --- a/onnxruntime/core/providers/qnn/builder/opbuilder/slice_op_builder.cc +++ b/onnxruntime/core/providers/qnn/builder/opbuilder/slice_op_builder.cc @@ -1,17 +1,12 @@ // Copyright (c) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. -#include "core/providers/common.h" -#include "core/providers/shared/utils/utils.h" +#include "core/providers/qnn/builder/opbuilder/base_op_builder.h" #include "core/providers/qnn/builder/qnn_model_wrapper.h" #include "core/providers/qnn/builder/op_builder_factory.h" #include "core/providers/qnn/builder/qnn_utils.h" #include "core/providers/cpu/tensor/slice_helper.h" -#include "core/framework/tensorprotoutils.h" - -#include "base_op_builder.h" - namespace onnxruntime { namespace qnn { @@ -86,26 +81,22 @@ static Status GetInitializerInputData(const NodeUnitIODef& input, const QnnModel ORT_RETURN_IF_NOT(initializer_proto->has_data_type(), "Expected initializer ", input_name.c_str(), " to have a proto data type."); - // Create empty Tensor. - const auto* dtype = DataTypeImpl::TensorTypeFromONNXEnum(initializer_proto->data_type())->GetElementType(); - TensorShape shape = onnxruntime::utils::GetTensorShapeFromTensorProto(*initializer_proto); - Tensor tensor(dtype, shape, std::make_shared()); - - // Deserialize initializer into Tensor. - ORT_RETURN_IF_ERROR(onnxruntime::utils::TensorProtoToTensor( - onnxruntime::Env::Default(), qnn_model_wrapper.GetGraphViewer().ModelPath(), *initializer_proto, tensor)); + // Deserialize initializer into byte buffer + std::vector initializer_bytes; + ORT_RETURN_IF_ERROR(qnn_model_wrapper.UnpackInitializerData(*initializer_proto, initializer_bytes)); Status status; // Copy Tensor of int32_t or int64_t elems into output (int64_ts). - if (tensor.IsDataType()) { - gsl::span tensor_elems = tensor.DataAsSpan(); + auto onnx_type = static_cast(initializer_proto->data_type()); + if (onnx_type == ONNX_NAMESPACE::TensorProto_DataType_INT64) { + gsl::span tensor_elems = ReinterpretAsSpan(initializer_bytes); output.insert(output.end(), tensor_elems.begin(), tensor_elems.end()); - } else if (tensor.IsDataType()) { - gsl::span tensor_elems = tensor.DataAsSpan(); + } else if (onnx_type == ONNX_NAMESPACE::TensorProto_DataType_INT32) { + gsl::span tensor_elems = ReinterpretAsSpan(initializer_bytes); output.insert(output.end(), tensor_elems.begin(), tensor_elems.end()); } else { - status = ORT_MAKE_STATUS(ONNXRUNTIME, FAIL, "Data type ", DataTypeImpl::ToString(dtype), + status = ORT_MAKE_STATUS(ONNXRUNTIME, FAIL, "Data type ", onnx_type, " is not supported for Slice initializer input ", input.node_arg.Name().c_str()); } diff --git a/onnxruntime/core/providers/qnn/builder/opbuilder/softmax_op_builder.cc b/onnxruntime/core/providers/qnn/builder/opbuilder/softmax_op_builder.cc index b62534bacf426..7326523737383 100644 --- a/onnxruntime/core/providers/qnn/builder/opbuilder/softmax_op_builder.cc +++ b/onnxruntime/core/providers/qnn/builder/opbuilder/softmax_op_builder.cc @@ -1,15 +1,10 @@ // Copyright (c) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. -#include "core/providers/common.h" -#include "core/providers/shared/utils/utils.h" -#include "core/framework/tensorprotoutils.h" +#include "core/providers/qnn/builder/opbuilder/base_op_builder.h" #include "core/providers/qnn/builder/qnn_utils.h" #include "core/providers/qnn/builder/qnn_model_wrapper.h" #include "core/providers/qnn/builder/op_builder_factory.h" -#include "core/common/safeint.h" - -#include "base_op_builder.h" namespace onnxruntime { namespace qnn { diff --git a/onnxruntime/core/providers/qnn/builder/opbuilder/split_op_builder.cc b/onnxruntime/core/providers/qnn/builder/opbuilder/split_op_builder.cc index ba5ad2cf03cef..1db9a8f1e3e15 100644 --- a/onnxruntime/core/providers/qnn/builder/opbuilder/split_op_builder.cc +++ b/onnxruntime/core/providers/qnn/builder/opbuilder/split_op_builder.cc @@ -1,16 +1,11 @@ // Copyright (c) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. -#include "core/providers/common.h" -#include "core/providers/shared/utils/utils.h" +#include "core/providers/qnn/builder/opbuilder/base_op_builder.h" #include "core/providers/qnn/builder/qnn_utils.h" #include "core/providers/qnn/builder/qnn_model_wrapper.h" #include "core/providers/qnn/builder/op_builder_factory.h" #include "core/providers/cpu/tensor/slice_helper.h" -#include "core/providers/qnn/builder/op_builder_factory.h" -#include "core/common/safeint.h" - -#include "core/providers/qnn/builder/opbuilder/base_op_builder.h" namespace onnxruntime { namespace qnn { diff --git a/onnxruntime/core/providers/qnn/builder/opbuilder/tile_op_builder.cc b/onnxruntime/core/providers/qnn/builder/opbuilder/tile_op_builder.cc index 851ca84dce075..1d518c3ed5359 100644 --- a/onnxruntime/core/providers/qnn/builder/opbuilder/tile_op_builder.cc +++ b/onnxruntime/core/providers/qnn/builder/opbuilder/tile_op_builder.cc @@ -1,16 +1,11 @@ // Copyright (c) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. -#include "core/providers/common.h" -#include "core/providers/shared/utils/utils.h" +#include "core/providers/qnn/builder/opbuilder/base_op_builder.h" #include "core/providers/qnn/builder/qnn_utils.h" #include "core/providers/qnn/builder/qnn_model_wrapper.h" #include "core/providers/qnn/builder/op_builder_factory.h" #include "core/providers/cpu/tensor/slice_helper.h" -#include "core/providers/qnn/builder/op_builder_factory.h" -#include "core/common/safeint.h" - -#include "core/providers/qnn/builder/opbuilder/base_op_builder.h" namespace onnxruntime { namespace qnn { diff --git a/onnxruntime/core/providers/qnn/builder/opbuilder/topk.cc b/onnxruntime/core/providers/qnn/builder/opbuilder/topk.cc index d22c0811682d0..adaa13912ae50 100644 --- a/onnxruntime/core/providers/qnn/builder/opbuilder/topk.cc +++ b/onnxruntime/core/providers/qnn/builder/opbuilder/topk.cc @@ -1,8 +1,8 @@ // Copyright (c) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. #include "core/providers/qnn/builder/opbuilder/base_op_builder.h" -#include "core/framework/utils.h" #include "core/providers/qnn/builder/op_builder_factory.h" +#include "core/providers/qnn/builder/qnn_utils.h" namespace onnxruntime { namespace qnn { const int TOPK_MIN_INPUT = 2; diff --git a/onnxruntime/core/providers/qnn/builder/opbuilder/transpose_op_builder.cc b/onnxruntime/core/providers/qnn/builder/opbuilder/transpose_op_builder.cc index a42d7312f0203..bcd8a6d0f78f6 100644 --- a/onnxruntime/core/providers/qnn/builder/opbuilder/transpose_op_builder.cc +++ b/onnxruntime/core/providers/qnn/builder/opbuilder/transpose_op_builder.cc @@ -4,12 +4,11 @@ #include #include +#include "core/providers/qnn/ort_api.h" +#include "core/providers/qnn/builder/opbuilder/base_op_builder.h" #include "core/providers/qnn/builder/qnn_model_wrapper.h" #include "core/providers/qnn/builder/op_builder_factory.h" #include "core/providers/qnn/builder/qnn_utils.h" -#include "core/common/safeint.h" - -#include "base_op_builder.h" namespace onnxruntime { namespace qnn { diff --git a/onnxruntime/core/providers/qnn/builder/qnn_backend_manager.cc b/onnxruntime/core/providers/qnn/builder/qnn_backend_manager.cc index 94374ca91aefe..8df4e5bb3ba39 100644 --- a/onnxruntime/core/providers/qnn/builder/qnn_backend_manager.cc +++ b/onnxruntime/core/providers/qnn/builder/qnn_backend_manager.cc @@ -7,25 +7,23 @@ #include #include #include "QnnOpDef.h" -#include "HTP/QnnHtpPerfInfrastructure.h" -#include "HTP/QnnHtpSystemContext.h" #include "CPU/QnnCpuCommon.h" // TODO: not exist for Windows yet // #include "GPU/QnnGpuCommon.h" #include "DSP/QnnDspCommon.h" #include "HTP/QnnHtpCommon.h" #include "HTP/QnnHtpContext.h" +#include "HTP/QnnHtpPerfInfrastructure.h" +#include "HTP/QnnHtpSystemContext.h" #include "Saver/QnnSaver.h" #include -#include "core/framework/endian_utils.h" -#include "core/common/logging/capture.h" + +#include "core/providers/qnn/ort_api.h" +#include "core/providers/qnn/qnn_allocator.h" +#include "core/providers/qnn/qnn_telemetry.h" #include "core/providers/qnn/builder/onnx_ctx_model_helper.h" #include "core/providers/qnn/builder/qnn_configs_helper.h" - -#ifdef _WIN32 -#include -#include "core/platform/tracing.h" -#endif +#include "core/providers/qnn/builder/qnn_utils.h" // Flag to determine if Backend should do node validation for each opNode added #define DO_GRAPH_NODE_VALIDATIONS 1 @@ -46,6 +44,14 @@ static Qnn_Version_t GetQnnInterfaceApiVersion(const QnnSystemInterface_t* qnn_i return qnn_interface->systemApiVersion; } +static const char* DlError() { +#ifdef _WIN32 + return ""; +#else + return ::dlerror(); +#endif +} + template Status QnnBackendManager::GetQnnInterfaceProvider(const char* lib_path, const char* interface_provider_name, @@ -241,17 +247,23 @@ void QnnLogging(const char* format, ORT_UNUSED_PARAMETER(level); ORT_UNUSED_PARAMETER(timestamp); + if (!::onnxruntime::logging::LoggingManager::HasDefaultLogger()) { + // QNN may call this logging callback at any point, which means that we need to explicitly check + // that the default logger has been initialized before trying to use it (otherwise get segfault). + return; + } + const auto& logger = ::onnxruntime::logging::LoggingManager::DefaultLogger(); const auto severity = ::onnxruntime::logging::Severity::kVERBOSE; const auto data_type = ::onnxruntime::logging::DataType::SYSTEM; if (logger.OutputIsEnabled(severity, data_type)) { - ::onnxruntime::logging::Capture(logger, - severity, - ::onnxruntime::logging::Category::onnxruntime, - data_type, - ORT_WHERE) - .ProcessPrintf(format, argument_parameter); + auto log_capture = Factory::Create(logger, + severity, + logging::Category::onnxruntime, + data_type, + ORT_WHERE); + log_capture->ProcessPrintf(format, argument_parameter); } } @@ -263,6 +275,9 @@ Status QnnBackendManager::InitializeQnnLog(const logging::Logger& logger) { QnnLog_Level_t qnn_log_level = MapOrtSeverityToQNNLogLevel(ort_log_level); LOGS(*logger_, VERBOSE) << "Set Qnn log level: " << qnn_log_level; + // NOTE: Even if logCreate() fails and QNN does not return a valid log_handle_, QNN may still + // call the QnnLogging() callback. So, we have to make sure that QnnLogging() can handle calls + // in which ORT logging is not available. Qnn_ErrorHandle_t result = qnn_interface_.logCreate(QnnLogging, qnn_log_level, &log_handle_); if (result != QNN_SUCCESS) { @@ -389,25 +404,25 @@ Status QnnBackendManager::CreateDevice() { // Set SoC Model. The *enum* Qnn_SocModel_t is deprecated and will not be updated in the future. Therefore, // must use the latest SDK documentation to get the SoC model of the latest HW. if (soc_model_ != QNN_SOC_MODEL_UNKNOWN) { - QnnHtpDevice_CustomConfig_t& custom_config = device_configs_builder.PushCustomConfig(); - custom_config.option = QNN_HTP_DEVICE_CONFIG_OPTION_SOC; - custom_config.socModel = soc_model_; + gsl::not_null custom_config = device_configs_builder.PushCustomConfig(); + custom_config->option = QNN_HTP_DEVICE_CONFIG_OPTION_SOC; + custom_config->socModel = soc_model_; - QnnDevice_Config_t& device_config = device_configs_builder.PushConfig(); - device_config.option = QNN_DEVICE_CONFIG_OPTION_CUSTOM; - device_config.customConfig = &custom_config; + gsl::not_null device_config = device_configs_builder.PushConfig(); + device_config->option = QNN_DEVICE_CONFIG_OPTION_CUSTOM; + device_config->customConfig = custom_config; } // Set the minimum HTP architecture. The driver will use ops that are compatible with this minimum architecture. if (htp_arch_ != QNN_HTP_DEVICE_ARCH_NONE) { - QnnHtpDevice_CustomConfig_t& custom_config = device_configs_builder.PushCustomConfig(); - custom_config.option = QNN_HTP_DEVICE_CONFIG_OPTION_ARCH; - custom_config.arch.arch = htp_arch_; - custom_config.arch.deviceId = device_id_; - - QnnDevice_Config_t& device_config = device_configs_builder.PushConfig(); - device_config.option = QNN_DEVICE_CONFIG_OPTION_CUSTOM; - device_config.customConfig = &custom_config; + gsl::not_null custom_config = device_configs_builder.PushCustomConfig(); + custom_config->option = QNN_HTP_DEVICE_CONFIG_OPTION_ARCH; + custom_config->arch.arch = htp_arch_; + custom_config->arch.deviceId = device_id_; + + gsl::not_null device_config = device_configs_builder.PushConfig(); + device_config->option = QNN_DEVICE_CONFIG_OPTION_CUSTOM; + device_config->customConfig = custom_config; } } @@ -536,19 +551,22 @@ Status QnnBackendManager::CreateContext() { QnnContext_Config_t context_priority_config = QNN_CONTEXT_CONFIG_INIT; ORT_RETURN_IF_ERROR(SetQnnContextConfig(context_priority_, context_priority_config)); - const QnnContext_Config_t* context_configs[] = {&context_priority_config, - &context_config_weight_sharing, - nullptr}; + const QnnContext_Config_t* npu_context_configs[] = {&context_priority_config, + &context_config_weight_sharing, + nullptr}; + const QnnContext_Config_t* empty_context_configs[] = {nullptr}; + bool is_npu_backend = IsNpuBackend(GetQnnBackendType()); Qnn_ContextHandle_t context = nullptr; Qnn_ErrorHandle_t result = qnn_interface_.contextCreate(backend_handle_, device_handle_, - context_configs, + is_npu_backend ? npu_context_configs : empty_context_configs, &context); - contexts_.push_back(context); ORT_RETURN_IF(QNN_CONTEXT_NO_ERROR != result, "Failed to create context. Error: ", QnnErrorHandleToString(result)); + ORT_RETURN_IF_ERROR(AddQnnContextHandle(context)); + context_created_ = true; return Status::OK(); } @@ -558,14 +576,9 @@ Status QnnBackendManager::ReleaseContext() { return Status::OK(); } - bool failed = false; - for (auto context : contexts_) { - Qnn_ErrorHandle_t result = qnn_interface_.contextFree(context, nullptr); - if (QNN_CONTEXT_NO_ERROR != result) { - failed = true; - } - } - ORT_RETURN_IF(failed, "Failed to release context."); + // release QNN context handles + contexts_.clear(); + context_map_.clear(); context_created_ = false; return Status::OK(); @@ -766,7 +779,7 @@ Status QnnBackendManager::LoadCachedQnnContextFromBuffer(char* buffer, uint64_t &context, profile_backend_handle_); ORT_RETURN_IF(QNN_SUCCESS != rt, "Failed to create context from binary. Error code: ", rt); - contexts_.push_back(context); + ORT_RETURN_IF_ERROR(AddQnnContextHandle(context)); if (1 == graph_count) { // in case the EPContext node is generated from script // the graph name from the context binary may not match the EPContext node name @@ -871,6 +884,10 @@ Status QnnBackendManager::SetupBackend(const logging::Logger& logger, } Status QnnBackendManager::CreateHtpPowerCfgId(uint32_t device_id, uint32_t core_id, uint32_t& htp_power_config_id) { + // This function is called in QNN EP's OnRunStart() even if QNN backend setup failed and the model is assigned + // to a different EP. Therefore, we have to check that backend setup actually completed before trying to + // create an HTP power config ID. Otherwise, this causes a segfault because the QNN backend lib is unloaded. + ORT_RETURN_IF_NOT(backend_setup_completed_, "Cannot create HTP power config ID if backend setup is not complete."); QnnDevice_Infrastructure_t qnn_device_infra = nullptr; auto status = qnn_interface_.deviceGetInfrastructure(&qnn_device_infra); ORT_RETURN_IF(QNN_SUCCESS != status, "backendGetPerfInfrastructure failed."); @@ -888,6 +905,10 @@ Status QnnBackendManager::CreateHtpPowerCfgId(uint32_t device_id, uint32_t core_ Status QnnBackendManager::SetHtpPowerConfig(uint32_t htp_power_config_client_id, HtpPerformanceMode htp_performance_mode) { + // This function is called in QNN EP's OnRunStart() even if QNN backend setup failed and the model is assigned + // to a different EP. Therefore, we have to check that backend setup actually completed before trying to + // set an HTP power config ID. Otherwise, this causes a segfault because the QNN backend lib is unloaded. + ORT_RETURN_IF_NOT(backend_setup_completed_, "Cannot set HTP power config ID if backend setup is not complete."); QnnDevice_Infrastructure_t qnn_device_infra = nullptr; auto status = qnn_interface_.deviceGetInfrastructure(&qnn_device_infra); ORT_RETURN_IF(QNN_SUCCESS != status, "backendGetPerfInfrastructure failed."); @@ -1029,6 +1050,10 @@ Status QnnBackendManager::SetHtpPowerConfig(uint32_t htp_power_config_client_id, Status QnnBackendManager::SetRpcControlLatency(uint32_t htp_power_config_client_id, uint32_t rpc_control_latency) { + // This function is called in QNN EP's OnRunStart() even if QNN backend setup failed and the model is assigned + // to a different EP. Therefore, we have to check that backend setup actually completed before trying to + // set RPC control latency. Otherwise, this causes a segfault because the QNN backend library is unloaded. + ORT_RETURN_IF_NOT(backend_setup_completed_, "Cannot set HTP RPC control latency if backend setup is not complete."); if (rpc_control_latency != 0) { QnnDevice_Infrastructure_t qnn_device_infra = nullptr; auto status = qnn_interface_.deviceGetInfrastructure(&qnn_device_infra); @@ -1091,39 +1116,35 @@ Status QnnBackendManager::TerminateQnnLog() { } void QnnBackendManager::ReleaseResources() { - if (!backend_setup_completed_) { - return; - } - auto result = ReleaseContext(); if (Status::OK() != result) { - LOGS_DEFAULT(ERROR) << "Failed to ReleaseContext."; + LOGS_DEFAULT(ERROR) << "Failed to ReleaseContext: " << result.ErrorMessage(); } result = ReleaseProfilehandle(); if (Status::OK() != result) { - LOGS_DEFAULT(ERROR) << "Failed to ReleaseProfilehandle."; + LOGS_DEFAULT(ERROR) << "Failed to ReleaseProfilehandle: " << result.ErrorMessage(); } result = ReleaseDevice(); if (Status::OK() != result) { - LOGS_DEFAULT(ERROR) << "Failed to ReleaseDevice."; + LOGS_DEFAULT(ERROR) << "Failed to ReleaseDevice: " << result.ErrorMessage(); } result = ShutdownBackend(); if (Status::OK() != result) { - LOGS_DEFAULT(ERROR) << "Failed to ShutdownBackend."; + LOGS_DEFAULT(ERROR) << "Failed to ShutdownBackend: " << result.ErrorMessage(); } result = TerminateQnnLog(); if (Status::OK() != result) { - LOGS_DEFAULT(ERROR) << "Failed to TerminateQnnLog."; + LOGS_DEFAULT(ERROR) << "Failed to TerminateQnnLog: " << result.ErrorMessage(); } if (backend_lib_handle_) { result = UnloadLib(backend_lib_handle_); if (Status::OK() != result) { - LOGS_DEFAULT(ERROR) << "Failed to unload backend library."; + LOGS_DEFAULT(ERROR) << "Failed to unload backend library: " << result.ErrorMessage(); } } @@ -1138,15 +1159,16 @@ Status QnnBackendManager::ExtractBackendProfilingInfo() { } bool tracelogging_provider_ep_enabled = false; - const Env& env = Env::Default(); - auto& provider = env.GetTelemetryProvider(); - auto level = provider.Level(); +#ifdef _WIN32 + auto& provider = QnnTelemetry::Instance(); if (provider.IsEnabled()) { + auto level = provider.Level(); auto keyword = provider.Keyword(); if ((keyword & static_cast(onnxruntime::logging::ORTTraceLoggingKeyword::Profiling)) != 0 && level >= 5) { tracelogging_provider_ep_enabled = true; } } +#endif // defined(_WIN32) // ETW disabled previously, but enabled now if (ProfilingLevel::INVALID == profiling_level_etw_ && tracelogging_provider_ep_enabled) { @@ -1364,18 +1386,8 @@ void QnnBackendManager::LogQnnProfileEventAsTraceLogging( const std::string& timingSource, const std::string& eventLevel, const char* eventIdentifier) { - TraceLoggingWrite( - telemetry_provider_handle, - "QNNProfilingEvent", - TraceLoggingKeyword(static_cast(onnxruntime::logging::ORTTraceLoggingKeyword::Profiling)), - TraceLoggingLevel(WINEVENT_LEVEL_VERBOSE), - TraceLoggingValue(timestamp, "Timestamp"), - TraceLoggingString(message.c_str(), "Message"), - TraceLoggingString(qnnScalarValue.c_str(), "Value"), - TraceLoggingString(unit.c_str(), "Unit of Measurement"), - TraceLoggingString(timingSource.c_str(), "Timing Source"), - TraceLoggingString(eventLevel.c_str(), "Event Level"), - TraceLoggingString(eventIdentifier, "Event Identifier")); + QnnTelemetry& qnn_telemetry = QnnTelemetry::Instance(); + qnn_telemetry.LogQnnProfileEvent(timestamp, message, qnnScalarValue, unit, timingSource, eventLevel, eventIdentifier); } #endif @@ -1452,13 +1464,8 @@ const char* QnnBackendManager::QnnProfileErrorToString(QnnProfile_Error_t error) } } -const char* QnnBackendManager::QnnErrorHandleToString(Qnn_ErrorHandle_t error) { - // From QNN SDK: The memory is statically owned and should not be freed by the caller. - const char* error_msg = nullptr; - if (QNN_SUCCESS == qnn_interface_.errorGetMessage(error, &error_msg)) { - return error_msg; - } - return "Unknown"; +std::string_view QnnBackendManager::QnnErrorHandleToString(Qnn_ErrorHandle_t error) { + return utils::GetQnnErrorMessage(qnn_interface_, error); } const std::string QnnBackendManager::ExtractQnnScalarValue(const Qnn_Scalar_t& scalar) { @@ -1532,7 +1539,8 @@ void* QnnBackendManager::LoadLib(const char* file_name, int flags, std::string& auto file_path = std::filesystem::path(file_name); if (!file_path.is_absolute()) { // construct an absolute path from ORT runtime path + file_name and check whether it exists. - auto pathstring = Env::Default().GetRuntimePath() + ToPathString(file_name); + const Env& env = GetDefaultEnv(); + auto pathstring = env.GetRuntimePath() + ToPathString(file_name); auto absolute_path = pathstring.c_str(); if (std::filesystem::exists(std::filesystem::path(absolute_path))) { // load library from absolute path and search for dependencies there. @@ -1691,5 +1699,90 @@ void* QnnBackendManager::LibFunction(void* handle, const char* symbol, std::stri #endif } +Status QnnBackendManager::AddQnnContextHandle(Qnn_ContextHandle_t raw_context_handle) { + ORT_RETURN_IF(logger_ == nullptr, "logger_ should be set."); + + auto free_context_handle = [this, &logger = *logger_](Qnn_ContextHandle_t raw_context_handle) { + const auto free_result = qnn_interface_.contextFree(raw_context_handle, nullptr); + if (free_result != QNN_CONTEXT_NO_ERROR) { + LOGS(logger, ERROR) << "qnn_interface.contextFree() failed: " + << utils::GetVerboseQnnErrorMessage(qnn_interface_, free_result); + } + }; + + // take ownership of `raw_context_handle` + auto context_handle = UniqueQnnContextHandle(raw_context_handle, free_context_handle); + auto mem_handle_manager = std::make_unique(GetQnnInterface(), raw_context_handle, + *logger_); + + auto context_handle_record = std::make_shared(); + context_handle_record->context_handle = std::move(context_handle); + context_handle_record->mem_handles = std::move(mem_handle_manager); + + const bool inserted = context_map_.try_emplace(raw_context_handle, std::move(context_handle_record)).second; + ORT_RETURN_IF_NOT(inserted, "QNN context was already added: ", raw_context_handle); + + contexts_.push_back(raw_context_handle); + + return Status::OK(); +} + +Status QnnBackendManager::GetOrRegisterContextMemHandle(Qnn_ContextHandle_t context_handle, + void* shared_memory_address, + const Qnn_Tensor_t& qnn_tensor, + Qnn_MemHandle_t& mem_handle) { + // Multi-threading situations to consider: + // 1) Shared memory allocation is being freed in another thread while we are processing `shared_memory_address`. + // This implies incorrect usage as the memory is being freed while it is still in use. Let's assume this won't + // happen. + // 2) The shared memory allocation clean up function is being run from another thread while the + // QnnContextHandleRecord or QnnBackendManager objects are being destroyed. + // Usage of weak_ptrs from the clean up function should ensure that those objects are only accessed while they are + // in scope. + + const auto context_handle_record_it = context_map_.find(context_handle); + ORT_RETURN_IF_NOT(context_handle_record_it != context_map_.end(), "QNN context not found: ", context_handle); + + auto& context_handle_record = context_handle_record_it->second; + auto& context_mem_handle_manager = context_handle_record->mem_handles; + + bool did_register{}; + ORT_RETURN_IF_ERROR(context_mem_handle_manager->GetOrRegister(shared_memory_address, qnn_tensor, + mem_handle, did_register)); + + if (did_register) { + HtpSharedMemoryAllocator::AllocationCleanUpFn unregister_mem_handle = + [&logger = *logger_, + weak_backend_manager = weak_from_this(), + weak_context_handle_record = std::weak_ptr{context_handle_record}]( + void* shared_memory_address) { + // Lock QnnBackendManager shared_ptr to ensure that QNN interface is still valid. + auto backend_manager = weak_backend_manager.lock(); + if (!backend_manager) { + return; + } + + // Lock QnnContextHandleRecord shared_ptr to ensure that QNN context handle is still valid. + auto context_handle_record = weak_context_handle_record.lock(); + if (!context_handle_record) { + return; + } + + auto& context_mem_handle_manager = context_handle_record->mem_handles; + + auto unregister_status = context_mem_handle_manager->Unregister(shared_memory_address); + if (!unregister_status.IsOK()) { + LOGS(logger, ERROR) << "Failed to unregister shared memory mem handle for address: " + << shared_memory_address << ", error: " << unregister_status.ErrorMessage(); + } + }; + + ORT_RETURN_IF_ERROR(HtpSharedMemoryAllocator::AddAllocationCleanUp(shared_memory_address, + std::move(unregister_mem_handle))); + } + + return Status::OK(); +} + } // namespace qnn } // namespace onnxruntime diff --git a/onnxruntime/core/providers/qnn/builder/qnn_backend_manager.h b/onnxruntime/core/providers/qnn/builder/qnn_backend_manager.h index 8222d54395bd9..4a69859a7e841 100644 --- a/onnxruntime/core/providers/qnn/builder/qnn_backend_manager.h +++ b/onnxruntime/core/providers/qnn/builder/qnn_backend_manager.h @@ -14,6 +14,7 @@ #include #include #include +#include #include #include @@ -21,9 +22,9 @@ #include "QnnLog.h" #include "QnnTypes.h" #include "System/QnnSystemInterface.h" -#include "core/common/status.h" -#include "core/common/logging/logging.h" -#include "core/common/path_string.h" + +#include "core/providers/qnn/ort_api.h" +#include "core/providers/qnn/builder/qnn_context_mem_handle_manager.h" #include "core/providers/qnn/builder/qnn_def.h" namespace onnxruntime { @@ -31,63 +32,48 @@ namespace qnn { class QnnModel; -class QnnBackendManager { +// configuration values for QnnBackendManager creation +struct QnnBackendManagerConfig { + std::string backend_path; + ProfilingLevel profiling_level_etw; + ProfilingLevel profiling_level; + std::string profiling_file_path; + ContextPriority context_priority; + std::string qnn_saver_path; + uint32_t device_id; + QnnHtpDevice_Arch_t htp_arch; + uint32_t soc_model; + bool enable_htp_weight_sharing; +}; + +class QnnBackendManager : public std::enable_shared_from_this { + private: + // private tag to pass to constructor to ensure that constructor cannot be directly called externally + struct PrivateConstructorTag {}; + public: - QnnBackendManager(std::string&& backend_path, - ProfilingLevel profiling_level_etw, - ProfilingLevel profiling_level, - std::string&& profiling_file_path, - ContextPriority context_priority, - std::string&& qnn_saver_path, - uint32_t device_id, - QnnHtpDevice_Arch_t htp_arch, - uint32_t soc_model, - bool enable_htp_weight_sharing) - : backend_path_(backend_path), - profiling_level_etw_(profiling_level_etw), - profiling_level_(profiling_level), - profiling_file_path_(profiling_file_path), - context_priority_(context_priority), - qnn_saver_path_(qnn_saver_path), - device_id_(device_id), - htp_arch_(htp_arch), - soc_model_(soc_model), - enable_htp_weight_sharing_(enable_htp_weight_sharing) { + static std::shared_ptr Create(const QnnBackendManagerConfig& config) { + return std::make_shared(config, PrivateConstructorTag{}); } - ORT_DISALLOW_COPY_ASSIGNMENT_AND_MOVE(QnnBackendManager); - ~QnnBackendManager(); - char* DlError() { -#ifdef _WIN32 - return ""; -#else - return ::dlerror(); -#endif + // Note: Creation should be done via Create(). This constructor is public so that it can be called from + // std::make_shared(). + QnnBackendManager(const QnnBackendManagerConfig& config, PrivateConstructorTag) + : backend_path_(config.backend_path), + profiling_level_etw_(config.profiling_level_etw), + profiling_level_(config.profiling_level), + profiling_file_path_(config.profiling_file_path), + context_priority_(config.context_priority), + qnn_saver_path_(config.qnn_saver_path), + device_id_(config.device_id), + htp_arch_(config.htp_arch), + soc_model_(config.soc_model), + enable_htp_weight_sharing_(config.enable_htp_weight_sharing) { } - Status LoadBackend(); - - Status InitializeBackend(); - - Status CreateDevice(); - - Status ReleaseDevice(); - - Status ShutdownBackend(); - - Status InitializeProfiling(); - - Status ReleaseProfilehandle(); - - Status CreateContext(); - - Status ReleaseContext(); - - Status ResetContext() { - ORT_RETURN_IF_ERROR(ReleaseContext()); + ORT_DISALLOW_COPY_ASSIGNMENT_AND_MOVE(QnnBackendManager); - return CreateContext(); - } + ~QnnBackendManager(); std::unique_ptr GetContextBinaryBuffer(uint64_t& written_buffer_size); @@ -148,7 +134,31 @@ class QnnBackendManager { uint64_t buffer_length, uint64_t& max_spill_fill_buffer_size); + // Gets an existing QNN mem handle or registers a new one. + // `mem_handle` is set to the QNN mem handle. + Status GetOrRegisterContextMemHandle(Qnn_ContextHandle_t context, void* shared_memory_address, + const Qnn_Tensor_t& qnn_tensor, + Qnn_MemHandle_t& mem_handle); + private: + Status LoadBackend(); + + Status InitializeBackend(); + + Status CreateDevice(); + + Status ReleaseDevice(); + + Status ShutdownBackend(); + + Status InitializeProfiling(); + + Status ReleaseProfilehandle(); + + Status CreateContext(); + + Status ReleaseContext(); + // Sets the ORT logger and creates a corresponding QNN logger with the same log level. // NOTE: caller must lock the `logger_recursive_mutex_` before calling this function. Status InitializeQnnLog(const logging::Logger& logger); @@ -217,7 +227,7 @@ class QnnBackendManager { static const std::string GetEventTypeString(QnnProfile_EventType_t eventType); static const std::string ExtractQnnScalarValue(const Qnn_Scalar_t& scalar); const char* QnnProfileErrorToString(QnnProfile_Error_t error); - const char* QnnErrorHandleToString(Qnn_ErrorHandle_t error); + std::string_view QnnErrorHandleToString(Qnn_ErrorHandle_t error); QnnLog_Level_t MapOrtSeverityToQNNLogLevel(logging::Severity ort_log_level); #ifdef _WIN32 void LogQnnProfileEventAsTraceLogging( @@ -230,6 +240,21 @@ class QnnBackendManager { const char* eventIdentifier); #endif + // Adds a new QNN context. + // Transfers ownership of `context_handle` (i.e., responsibility of freeing it) to this instance. + Status AddQnnContextHandle(Qnn_ContextHandle_t context_handle); + + private: + // assume Qnn_ContextHandle_t is a pointer and able to be wrapped with std::unique_ptr + static_assert(std::is_pointer_v); + using UniqueQnnContextHandle = + std::unique_ptr, std::function>; + + struct QnnContextHandleRecord { + UniqueQnnContextHandle context_handle; + std::unique_ptr mem_handles; + }; + private: const std::string backend_path_; std::recursive_mutex logger_recursive_mutex_; @@ -242,7 +267,16 @@ class QnnBackendManager { QnnBackend_Config_t** backend_config_ = nullptr; Qnn_LogHandle_t log_handle_ = nullptr; Qnn_DeviceHandle_t device_handle_ = nullptr; + + // Map of Qnn_ContextHandle_t to QnnContextHandleRecord. + // The QnnContextHandleRecord has ownership of the Qnn_ContextHandle_t. + // Note: Using shared_ptr so that we can refer to it with a weak_ptr from a + // HtpSharedMemoryAllocator allocation cleanup callback. + std::unordered_map> context_map_; + + // Vector of Qnn_ContextHandle_t. The context handles are owned by context_map_. std::vector contexts_; + ProfilingLevel profiling_level_etw_; ProfilingLevel profiling_level_; ProfilingLevel profiling_level_merge_; diff --git a/onnxruntime/core/providers/qnn/builder/qnn_configs_helper.h b/onnxruntime/core/providers/qnn/builder/qnn_configs_helper.h index 9dd9bbaa08d64..b581cd90537d9 100644 --- a/onnxruntime/core/providers/qnn/builder/qnn_configs_helper.h +++ b/onnxruntime/core/providers/qnn/builder/qnn_configs_helper.h @@ -3,7 +3,8 @@ #pragma once -#include +#include +#include namespace onnxruntime { namespace qnn { @@ -49,9 +50,9 @@ class QnnConfigsBuilder { * * \return A reference to a default CustomConfigType object. */ - CustomConfigType& PushCustomConfig() { - custom_configs_.push_back(custom_config_init_); - return custom_configs_.back(); + gsl::not_null PushCustomConfig() { + custom_configs_.push_back(std::make_unique(custom_config_init_)); + return custom_configs_.back().get(); } /** @@ -60,15 +61,15 @@ class QnnConfigsBuilder { * * \return A reference to a default BaseConfigType object. */ - BaseConfigType& PushConfig() { - configs_.push_back(base_config_init_); - BaseConfigType& config = configs_.back(); + gsl::not_null PushConfig() { + configs_.push_back(std::make_unique(base_config_init_)); + BaseConfigType* config = configs_.back().get(); // Add pointer to this new config to the list of config pointers. if (IsNullTerminated()) { - config_ptrs_.back() = &config; // Replace last nullptr entry. + config_ptrs_.back() = config; // Replace last nullptr entry. } else { - config_ptrs_.push_back(&config); + config_ptrs_.push_back(config); } return config; @@ -81,9 +82,14 @@ class QnnConfigsBuilder { BaseConfigType base_config_init_; CustomConfigType custom_config_init_; - InlinedVector custom_configs_; - InlinedVector configs_; - InlinedVector config_ptrs_; + + // Store elements of unique_ptrs instead of by value because std::vector reallocation would change the + // location of elements in memory. BaseConfigType objects may contain pointers to CustomConfigType objects, + // so we need to make sure that pointers to these objects are stable in memory. + std::vector> custom_configs_; + std::vector> configs_; + + std::vector config_ptrs_; }; } // namespace qnn diff --git a/onnxruntime/core/providers/qnn/builder/qnn_context_mem_handle_manager.cc b/onnxruntime/core/providers/qnn/builder/qnn_context_mem_handle_manager.cc new file mode 100644 index 0000000000000..4d868c6ab96f6 --- /dev/null +++ b/onnxruntime/core/providers/qnn/builder/qnn_context_mem_handle_manager.cc @@ -0,0 +1,130 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +#include "core/providers/qnn/builder/qnn_context_mem_handle_manager.h" + +#include "HTP/QnnHtpMem.h" + +#include "core/providers/qnn/ort_api.h" +#include "core/providers/qnn/builder/qnn_def.h" +#include "core/providers/qnn/builder/qnn_utils.h" +#include "core/providers/qnn/qnn_allocator.h" + +namespace onnxruntime::qnn { + +QnnContextMemHandleManager::QnnContextMemHandleManager(const QNN_INTERFACE_VER_TYPE& qnn_interface, + Qnn_ContextHandle_t context, + const logging::Logger& logger) + : qnn_interface_{qnn_interface}, + context_{context}, + logger_{logger} { +} + +QnnContextMemHandleManager::~QnnContextMemHandleManager() { + Clear(); +} + +Status QnnContextMemHandleManager::GetOrRegister(void* shared_memory_address, const Qnn_Tensor_t& qnn_tensor, + Qnn_MemHandle_t& qnn_mem_handle, bool& did_register) { + const auto qnn_tensor_rank = GetQnnTensorRank(qnn_tensor); + auto* const qnn_tensor_dims = GetQnnTensorDims(qnn_tensor); + const auto qnn_tensor_data_type = GetQnnTensorDataType(qnn_tensor); + + const size_t qnn_tensor_data_size = + utils::GetQnnTensorDataSizeInBytes(gsl::span{qnn_tensor_dims, size_t{qnn_tensor_rank}}, qnn_tensor_data_type); + + { + std::scoped_lock g{mem_handles_mutex_}; + + // find existing mem handle + if (const auto mem_handles_it = mem_handles_.find(shared_memory_address); + mem_handles_it != mem_handles_.end()) { + const auto& mem_handle_record = mem_handles_it->second; + + // check that actual tensor size is less than or equal to registered tensor size + ORT_RETURN_IF_NOT(qnn_tensor_data_size <= mem_handle_record.registered_tensor_data_size, + "Actual tensor data size (", qnn_tensor_data_size, + ") is larger than registered tensor data size (", mem_handle_record.registered_tensor_data_size, + ")."); + + qnn_mem_handle = mem_handle_record.mem_handle.get(); + did_register = false; + return Status::OK(); + } + + // register a new mem handle + HtpSharedMemoryAllocator::SharedMemoryInfo shared_memory_info{}; + ORT_RETURN_IF_ERROR(HtpSharedMemoryAllocator::GetAllocationSharedMemoryInfo(shared_memory_address, + shared_memory_info)); + + Qnn_MemDescriptor_t mem_descriptor = QNN_MEM_DESCRIPTOR_INIT; + mem_descriptor.memShape.dimSize = qnn_tensor_dims; + mem_descriptor.memShape.numDim = qnn_tensor_rank; + mem_descriptor.memShape.shapeConfig = nullptr; + mem_descriptor.dataType = qnn_tensor_data_type; + mem_descriptor.memType = QNN_MEM_TYPE_CUSTOM; + + QnnMemHtp_Descriptor_t htp_mem_descriptor{}; + htp_mem_descriptor.type = QNN_HTP_MEM_SHARED_BUFFER; + htp_mem_descriptor.size = shared_memory_info.total_size; + htp_mem_descriptor.sharedBufferConfig.fd = shared_memory_info.fd; + htp_mem_descriptor.sharedBufferConfig.offset = shared_memory_info.offset; + + mem_descriptor.customInfo = &htp_mem_descriptor; + + LOGS(logger_, VERBOSE) << "Registering QNN mem handle for context: " << context_ + << ", shared memory (address: " << shared_memory_address + << ", offset: " << shared_memory_info.offset + << ", fd: " << shared_memory_info.fd + << ")"; + + Qnn_MemHandle_t raw_mem_handle{}; + const auto register_result = qnn_interface_.memRegister(context_, &mem_descriptor, 1, &raw_mem_handle); + ORT_RETURN_IF_NOT(register_result == QNN_SUCCESS, + "qnn_interface.memRegister() failed: ", + utils::GetVerboseQnnErrorMessage(qnn_interface_, register_result)); + + LOGS(logger_, VERBOSE) << "Registered QNN mem handle. mem_handle: " << raw_mem_handle; + + // NOTE: Must use the default ORT logger inside this lambda. Don't capture this->logger_ because it may be deleted + // by the time we need to unregister all memory handles. This happens when this->logger_ is a session logger: + // ~InferenceSession() -> ~Logger() -> ~QnnExecutionProvider() -> ~QnnBackendManager() -> + // ~QnnContextMemHandleManager() -> unregister_mem_handle() segfault + const auto unregister_mem_handle = [&qnn_interface = this->qnn_interface_](Qnn_MemHandle_t raw_mem_handle) { + LOGS_DEFAULT(VERBOSE) << "Unregistering QNN mem handle. mem_handle: " << raw_mem_handle; + + const auto unregister_result = qnn_interface.memDeRegister(&raw_mem_handle, 1); + if (unregister_result != QNN_SUCCESS) { + LOGS_DEFAULT(ERROR) << "qnn_interface.memDeRegister() failed: " + << utils::GetVerboseQnnErrorMessage(qnn_interface, unregister_result); + } + }; + + UniqueQnnMemHandle mem_handle(raw_mem_handle, unregister_mem_handle); + MemHandleRecord mem_handle_record{qnn_tensor_data_size, std::move(mem_handle)}; + mem_handles_.emplace(shared_memory_address, std::move(mem_handle_record)); + + qnn_mem_handle = raw_mem_handle; + did_register = true; + return Status::OK(); + } +} + +Status QnnContextMemHandleManager::Unregister(void* shared_memory_address) { + std::scoped_lock g{mem_handles_mutex_}; + + auto mem_handles_it = mem_handles_.find(shared_memory_address); + ORT_RETURN_IF_NOT(mem_handles_it != mem_handles_.end(), + "No mem handle found for address (", shared_memory_address, ")."); + + mem_handles_.erase(mem_handles_it); + + return Status::OK(); +} + +void QnnContextMemHandleManager::Clear() { + std::scoped_lock g{mem_handles_mutex_}; + mem_handles_.clear(); +} + +} // namespace onnxruntime::qnn diff --git a/onnxruntime/core/providers/qnn/builder/qnn_context_mem_handle_manager.h b/onnxruntime/core/providers/qnn/builder/qnn_context_mem_handle_manager.h new file mode 100644 index 0000000000000..0dd8a8466d1cf --- /dev/null +++ b/onnxruntime/core/providers/qnn/builder/qnn_context_mem_handle_manager.h @@ -0,0 +1,58 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +#pragma once + +#include +#include +#include + +#include "QnnInterface.h" + +#include "core/providers/qnn/ort_api.h" + +namespace onnxruntime::qnn { + +// This class manages QNN mem handles (Qnn_MemHandle_t) associated with a QNN context (Qnn_ContextHandle_t). +// In particular, it handles the registration and deregistration of mem handles. +// The associated QNN context is expected to be in scope for the lifetime of the QnnContextMemHandleManager. +class QnnContextMemHandleManager { + public: + QnnContextMemHandleManager(const QNN_INTERFACE_VER_TYPE& qnn_interface, Qnn_ContextHandle_t qnn_context, + const logging::Logger& logger); + + ~QnnContextMemHandleManager(); + + ORT_DISALLOW_COPY_ASSIGNMENT_AND_MOVE(QnnContextMemHandleManager); + + // Gets an existing QNN mem handle or registers a new one. + // `qnn_mem_handle` is set to the QNN mem handle and `did_register` is true if `qnn_mem_handle` was newly registered. + Status GetOrRegister(void* shared_memory_address, const Qnn_Tensor_t& qnn_tensor, + Qnn_MemHandle_t& qnn_mem_handle, bool& did_register); + + Status Unregister(void* shared_memory_address); + + void Clear(); + + private: + const QNN_INTERFACE_VER_TYPE& qnn_interface_; + Qnn_ContextHandle_t context_; + const logging::Logger& logger_; + + // assume Qnn_MemHandle_t is a pointer and able to be wrapped with std::unique_ptr + static_assert(std::is_pointer_v); + + using UniqueQnnMemHandle = + std::unique_ptr, std::function>; + + struct MemHandleRecord { + size_t registered_tensor_data_size; + UniqueQnnMemHandle mem_handle; + }; + + // shared memory address -> associated mem handle record + InlinedHashMap mem_handles_; + std::mutex mem_handles_mutex_; // synchronize access to mem_handles_ +}; + +} // namespace onnxruntime::qnn diff --git a/onnxruntime/core/providers/qnn/builder/qnn_def.cc b/onnxruntime/core/providers/qnn/builder/qnn_def.cc index c0fc079979822..1a58d0d417a0b 100644 --- a/onnxruntime/core/providers/qnn/builder/qnn_def.cc +++ b/onnxruntime/core/providers/qnn/builder/qnn_def.cc @@ -208,6 +208,22 @@ void SetQnnTensorClientBufData(Qnn_Tensor_t& qnn_tensor, void* client_buf_data) ORT_THROW("QNN tensor version not supported, QNN tensor version: ", qnn_tensor.version); } +void SetQnnTensorMemHandle(Qnn_Tensor_t& qnn_tensor, Qnn_MemHandle_t mem_handle) { + if (QNN_TENSOR_VERSION_1 == qnn_tensor.version) { + qnn_tensor.v1.memHandle = mem_handle; + return; + } + +#ifdef QNN_TENSOR_V2_INIT + if (QNN_TENSOR_VERSION_2 == qnn_tensor.version) { + qnn_tensor.v2.memHandle = mem_handle; + return; + } +#endif // QNN_TENSOR_V2_INIT + + ORT_THROW("QNN tensor version not supported, QNN tensor version: ", qnn_tensor.version); +} + void SetQnnTensorQParams(Qnn_Tensor_t& qnn_tensor, const Qnn_QuantizeParams_t& quantize_params) { if (QNN_TENSOR_VERSION_1 == qnn_tensor.version) { qnn_tensor.v1.quantizeParams = quantize_params; @@ -350,6 +366,20 @@ const Qnn_ClientBuffer_t& GetQnnTensorClientBuf(const Qnn_Tensor_t& qnn_tensor) ORT_THROW("QNN tensor version not supported, QNN tensor version: ", qnn_tensor.version); } +Qnn_MemHandle_t GetQnnTensorMemHandle(const Qnn_Tensor_t& qnn_tensor) { + if (QNN_TENSOR_VERSION_1 == qnn_tensor.version) { + return qnn_tensor.v1.memHandle; + } + +#ifdef QNN_TENSOR_V2_INIT + if (QNN_TENSOR_VERSION_2 == qnn_tensor.version) { + return qnn_tensor.v2.memHandle; + } +#endif // QNN_TENSOR_V2_INIT + + ORT_THROW("QNN tensor version not supported, QNN tensor version: ", qnn_tensor.version); +} + const Qnn_QuantizeParams_t& GetQnnTensorQParams(const Qnn_Tensor_t& qnn_tensor) { if (QNN_TENSOR_VERSION_1 == qnn_tensor.version) { return qnn_tensor.v1.quantizeParams; @@ -364,6 +394,20 @@ const Qnn_QuantizeParams_t& GetQnnTensorQParams(const Qnn_Tensor_t& qnn_tensor) ORT_THROW("QNN tensor version not supported, QNN tensor version: ", qnn_tensor.version); } +uint8_t* GetQnnTensorIsDynamicDimensions(const Qnn_Tensor_t& qnn_tensor) { + if (QNN_TENSOR_VERSION_1 == qnn_tensor.version) { + return nullptr; // not present in v1 + } + +#ifdef QNN_TENSOR_V2_INIT + if (QNN_TENSOR_VERSION_2 == qnn_tensor.version) { + return qnn_tensor.v2.isDynamicDimensions; + } +#endif // QNN_TENSOR_V2_INIT + + ORT_THROW("QNN tensor version not supported, QNN tensor version: ", qnn_tensor.version); +} + Status CompareQnnQuantParams(const Qnn_QuantizeParams_t& qparam0, const Qnn_QuantizeParams_t& qparam1, float& scale_diff, int32_t& offset_diff) { scale_diff = 0.0f; diff --git a/onnxruntime/core/providers/qnn/builder/qnn_def.h b/onnxruntime/core/providers/qnn/builder/qnn_def.h index ffd2dc9b11010..148fa115d40e5 100644 --- a/onnxruntime/core/providers/qnn/builder/qnn_def.h +++ b/onnxruntime/core/providers/qnn/builder/qnn_def.h @@ -9,8 +9,7 @@ #include #include #include -#include "core/graph/basic_types.h" -#include "core/common/common.h" +#include "core/providers/qnn/ort_api.h" #include "core/providers/qnn/builder/qnn_quant_params_wrapper.h" namespace onnxruntime { @@ -105,6 +104,7 @@ void SetQnnTensorClientBuf(Qnn_Tensor_t& qnn_tensor, const std::vector void SetQnnTensorClientBuf(Qnn_Tensor_t& qnn_tensor, void* buf_data, uint32_t buf_size); void SetQnnTensorClientBufSize(Qnn_Tensor_t& qnn_tensor, uint32_t client_buf_size); void SetQnnTensorClientBufData(Qnn_Tensor_t& qnn_tensor, void* client_buf_data); +void SetQnnTensorMemHandle(Qnn_Tensor_t& qnn_tensor, Qnn_MemHandle_t mem_handle); void SetQnnTensorQParams(Qnn_Tensor_t& qnn_tensor, const Qnn_QuantizeParams_t& quantize_params); bool CreateTensorInQnnGraph(const QNN_INTERFACE_VER_TYPE& qnn_interface, const Qnn_GraphHandle_t& graph, @@ -123,7 +123,9 @@ Qnn_TensorMemType_t GetQnnTensorMemType(const Qnn_Tensor_t& qnn_tensor); uint32_t GetQnnTensorRank(const Qnn_Tensor_t& qnn_tensor); uint32_t* GetQnnTensorDims(const Qnn_Tensor_t& qnn_tensor); const Qnn_ClientBuffer_t& GetQnnTensorClientBuf(const Qnn_Tensor_t& qnn_tensor); +Qnn_MemHandle_t GetQnnTensorMemHandle(const Qnn_Tensor_t& qnn_tensor); const Qnn_QuantizeParams_t& GetQnnTensorQParams(const Qnn_Tensor_t& qnn_tensor); +uint8_t* GetQnnTensorIsDynamicDimensions(const Qnn_Tensor_t& qnn_tensor); /** * Compares two sets of quantization parameters. Sets the parameters `scale_diff` and `offset_diff` @@ -465,11 +467,13 @@ class QnnOpProperty { class GraphInfo { public: - GraphInfo(const Qnn_GraphHandle_t graph, + GraphInfo(Qnn_GraphHandle_t graph, const std::string& name, + Qnn_ContextHandle_t graph_context, std::vector&& input_tensors, std::vector&& output_tensors) : graph_name_(name), graph_(graph), + graph_context_(graph_context), input_tensors_(std::move(input_tensors)), output_tensors_(std::move(output_tensors)) { } @@ -479,12 +483,15 @@ class GraphInfo { const std::string& Name() const { return graph_name_; } const std::vector& InputTensors() const { return input_tensors_; } const std::vector& OutputTensors() const { return output_tensors_; } - const Qnn_GraphHandle_t& Graph() const { return graph_; } + Qnn_GraphHandle_t Graph() const { return graph_; } + Qnn_ContextHandle_t GraphContext() const { return graph_context_; } ORT_DISALLOW_COPY_ASSIGNMENT_AND_MOVE(GraphInfo); private: std::string graph_name_; Qnn_GraphHandle_t graph_; + // QNN context that holds the QNN graph referenced by `graph_` + Qnn_ContextHandle_t graph_context_; std::vector input_tensors_; std::vector output_tensors_; }; diff --git a/onnxruntime/core/providers/qnn/builder/qnn_model.cc b/onnxruntime/core/providers/qnn/builder/qnn_model.cc index 4f73e4c532ed4..a9ccb9cc15206 100644 --- a/onnxruntime/core/providers/qnn/builder/qnn_model.cc +++ b/onnxruntime/core/providers/qnn/builder/qnn_model.cc @@ -4,30 +4,27 @@ #include "qnn_model.h" #include +#include #include "QnnOpDef.h" +#include "core/providers/qnn/ort_api.h" #include "core/providers/qnn/builder/op_builder_factory.h" #include "core/providers/qnn/builder/qnn_node_group.h" -#include "core/providers/shared/utils/utils.h" -#include "core/framework/utils.h" -#include "core/optimizer/qdq_transformer/selectors_actions/qdq_selectors.h" -#include "core/optimizer/qdq_transformer/selectors_actions/shared/utils.h" #include "core/providers/qnn/builder/qnn_utils.h" +#include "core/providers/qnn/qnn_allocator.h" +#include "core/providers/qnn/shared_context.h" namespace onnxruntime { namespace qnn { -bool QnnModel::GetGraphInfoFromModel(QnnModelWrapper& model_wrapper, const logging::Logger& logger) { +bool QnnModel::GetGraphInfoFromModel(QnnModelWrapper& model_wrapper, const logging::Logger& /* logger */) { bool rt = true; graph_info_ = std::make_unique(model_wrapper.GetQnnGraph(), model_wrapper.GetQnnGraphName(), + model_wrapper.GetQnnGraphContext(), std::move(model_wrapper.GetGraphInputTensorWrappers()), std::move(model_wrapper.GetGraphOutputTensorWrappers())); - if (graph_info_ == nullptr) { - LOGS(logger, ERROR) << "GetGraphInfoFromModel() failed to allocate GraphInfo."; - return false; - } return rt; } @@ -104,7 +101,7 @@ Status QnnModel::ComposeGraph(const GraphViewer& graph_viewer, // valid throughout the lifetime of the ModelBuilder std::vector> node_unit_holder; std::unordered_map node_unit_map; - std::tie(node_unit_holder, node_unit_map) = QDQ::GetAllNodeUnits(graph_viewer, logger); + std::tie(node_unit_holder, node_unit_map) = GetQDQNodeUnits(graph_viewer, logger); // This name must be same with the EPContext node name const auto& graph_name = fused_node.Name(); @@ -185,7 +182,33 @@ Status QnnModel::SetupQnnInputOutput(const logging::Logger& logger) { return Status::OK(); } -Status QnnModel::ExecuteGraph(const Ort::KernelContext& context, const logging::Logger& logger) { +static Status BindQnnTensorMemoryToOrtValueMemory(const logging::Logger& logger, + QnnBackendManager& qnn_backend_manager, + const OrtMemoryInfo& ort_value_memory_info, + void* ort_value_data, uint32_t ort_value_data_size, + Qnn_ContextHandle_t qnn_context, + Qnn_Tensor_t& qnn_tensor) { + // either set qnn_tensor memHandle or clientBuf + const bool uses_shared_memory = ort_value_memory_info == HtpSharedMemoryAllocator::AssociatedMemoryInfo(); + + if (!uses_shared_memory) { + LOGS(logger, VERBOSE) << "Setting Qnn_Tensor_t clientBuf to ORT tensor memory."; + SetQnnTensorMemType(qnn_tensor, QNN_TENSORMEMTYPE_RAW); + SetQnnTensorClientBuf(qnn_tensor, ort_value_data, ort_value_data_size); + } else { + LOGS(logger, VERBOSE) << "Setting Qnn_Tensor_t memHandle to ORT tensor shared memory."; + Qnn_MemHandle_t qnn_mem_handle{}; + ORT_RETURN_IF_ERROR(qnn_backend_manager.GetOrRegisterContextMemHandle(qnn_context, ort_value_data, qnn_tensor, + qnn_mem_handle)); + SetQnnTensorMemType(qnn_tensor, QNN_TENSORMEMTYPE_MEMHANDLE); + SetQnnTensorMemHandle(qnn_tensor, qnn_mem_handle); + } + + return Status::OK(); +} + +Status QnnModel::ExecuteGraph(const Ort::KernelContext& context, + const logging::Logger& logger) { LOGS(logger, VERBOSE) << "QnnModel::ExecuteGraphs"; const size_t num_inputs = context.GetInputCount(); const size_t num_outputs = context.GetOutputCount(); @@ -193,7 +216,7 @@ Status QnnModel::ExecuteGraph(const Ort::KernelContext& context, const logging:: ORT_RETURN_IF_NOT(qnn_output_infos_.size() == num_outputs, "Inconsistent output sizes"); using namespace qnn::utils; - auto TensorDataSize = [&](auto ort_tensor) -> size_t { + auto TensorDataSize = [](auto ort_tensor) -> size_t { auto tensor_type_and_shape = ort_tensor.GetTensorTypeAndShapeInfo(); size_t length = tensor_type_and_shape.GetElementCount(); ONNXTensorElementDataType element_type = tensor_type_and_shape.GetElementType(); @@ -210,13 +233,19 @@ Status QnnModel::ExecuteGraph(const Ort::KernelContext& context, const logging:: auto ort_input_tensor = context.GetInput(qnn_input_info.ort_index); auto ort_tensor_size = TensorDataSize(ort_input_tensor); LOGS(logger, VERBOSE) << "Qnn tensor size: " << qnn_input_info.tensor_byte_size - << "Ort tensor size: " << ort_tensor_size; + << " Ort tensor size: " << ort_tensor_size; ORT_RETURN_IF_NOT(qnn_input_info.tensor_byte_size == ort_tensor_size, "ORT Tensor data size does not match QNN tensor data size."); qnn_inputs.push_back(qnn_input_info.tensor_wrapper->GetQnnTensor()); - SetQnnTensorClientBuf(qnn_inputs.back(), - const_cast(ort_input_tensor.GetTensorData()), qnn_input_info.tensor_byte_size); + + ORT_RETURN_IF_ERROR(BindQnnTensorMemoryToOrtValueMemory( + logger, + *qnn_backend_manager_, + *static_cast(ort_input_tensor.GetTensorMemoryInfo()), + const_cast(ort_input_tensor.GetTensorRawData()), qnn_input_info.tensor_byte_size, + graph_info_->GraphContext(), + qnn_inputs.back())); } std::vector qnn_outputs; @@ -230,24 +259,30 @@ Status QnnModel::ExecuteGraph(const Ort::KernelContext& context, const logging:: auto ort_output_tensor = context.GetOutput(qnn_output_info.ort_index, output_shape.data(), output_shape.size()); auto ort_tensor_size = TensorDataSize(ort_output_tensor); LOGS(logger, VERBOSE) << "Qnn tensor size: " << qnn_output_info.tensor_byte_size - << "Ort tensor size: " << ort_tensor_size; + << " Ort tensor size: " << ort_tensor_size; ORT_RETURN_IF_NOT(qnn_output_info.tensor_byte_size == ort_tensor_size, "ORT Tensor data size does not match QNN tensor data size"); qnn_outputs.push_back(qnn_output_info.tensor_wrapper->GetQnnTensor()); - SetQnnTensorClientBuf(qnn_outputs.back(), - const_cast(ort_output_tensor.GetTensorData()), qnn_output_info.tensor_byte_size); + + ORT_RETURN_IF_ERROR(BindQnnTensorMemoryToOrtValueMemory( + logger, + *qnn_backend_manager_, + *static_cast(ort_output_tensor.GetTensorMemoryInfo()), + ort_output_tensor.GetTensorMutableRawData(), qnn_output_info.tensor_byte_size, + graph_info_->GraphContext(), + qnn_outputs.back())); } - LOGS(logger, VERBOSE) << "Start execute QNN graph:" << graph_info_->Name(); - auto qnn_interface = qnn_backend_manager_->GetQnnInterface(); - auto profile_backend_handle = qnn_backend_manager_->GetQnnProfileHandle(); Qnn_ErrorHandle_t execute_status = QNN_GRAPH_NO_ERROR; - { - // Acquire mutex before calling graphExecute and profiling APIs to support calling session.Run() - // from multiple threads. + const auto& qnn_interface = qnn_backend_manager_->GetQnnInterface(); + + // Acquire mutex before calling QNN APIs to support calling session.Run() from multiple threads. std::lock_guard lock(graph_exec_mutex_); + + LOGS(logger, VERBOSE) << "Start execute QNN graph:" << graph_info_->Name(); + auto profile_backend_handle = qnn_backend_manager_->GetQnnProfileHandle(); execute_status = qnn_interface.graphExecute(graph_info_->Graph(), qnn_inputs.data(), static_cast(qnn_inputs.size()), @@ -275,20 +310,6 @@ Status QnnModel::ExecuteGraph(const Ort::KernelContext& context, const logging:: return Status::OK(); } -Status QnnModel::GetQnnTensorDataLength(const std::vector& dims, - Qnn_DataType_t data_type, - size_t& data_length) const { - ORT_RETURN_IF(dims.empty(), "Tensor dimensions is nullptr"); - - data_length = utils::GetElementSizeByType(data_type); - - for (size_t r = 0; r < dims.size(); r++) { - data_length *= dims[r]; - } - - return Status::OK(); -} - // Setup information for Qnn inputs/outputs used during execution. Status QnnModel::SetupTensors(std::vector& qnn_tensor_infos, const std::vector& tensor_wrappers, @@ -298,11 +319,11 @@ Status QnnModel::SetupTensors(std::vector& qnn_tensor_infos, qnn_tensor_infos.resize(tensor_count); for (auto& tensor_wrapper : tensor_wrappers) { - size_t length = 0; - using namespace qnn::utils; - ORT_RETURN_IF_ERROR(GetQnnTensorDataLength(tensor_wrapper.GetTensorDims(), - tensor_wrapper.GetTensorDataType(), - length)); + ORT_RETURN_IF(utils::QnnTensorHasDynamicShape(tensor_wrapper.GetQnnTensor()), + "QNN tensor (", tensor_wrapper.GetName(), ") has dynamic shape. This is not supported yet."); + + const size_t length = utils::GetQnnTensorDataSizeInBytes(tensor_wrapper.GetTensorDims(), + tensor_wrapper.GetTensorDataType()); const auto& tensor_name = tensor_wrapper.GetName(); auto qnn_index = is_input ? GetGraphInputIndex(tensor_name) : GetOutputIndex(tensor_name); auto ort_index = is_input ? GetOrtInputIndex(tensor_name) : qnn_index; @@ -379,9 +400,9 @@ Status QnnModel::DeserializeGraphInfoFromBinaryInfo(const QnnSystemContext_Graph graph_info_ = std::make_unique(graph, graph_name, + context, std::move(input_tensor_wrappers), std::move(output_tensor_wrappers)); - ORT_RETURN_IF(graph_info_ == nullptr, "Failed to allocate GraphInfo"); return Status::OK(); } diff --git a/onnxruntime/core/providers/qnn/builder/qnn_model.h b/onnxruntime/core/providers/qnn/builder/qnn_model.h index 2e0935391ca78..3a2a080aa391f 100644 --- a/onnxruntime/core/providers/qnn/builder/qnn_model.h +++ b/onnxruntime/core/providers/qnn/builder/qnn_model.h @@ -3,16 +3,14 @@ #pragma once +#include #include -#include "core/common/status.h" -#include "core/framework/node_unit.h" -#include "core/graph/graph_viewer.h" -#include +#include "core/providers/qnn/ort_api.h" #include "core/providers/qnn/builder/qnn_def.h" #include "core/providers/qnn/builder/qnn_model_wrapper.h" #include "core/providers/qnn/builder/qnn_backend_manager.h" -#include "core/session/onnxruntime_cxx_api.h" +#include "core/providers/qnn/rpcmem_library.h" namespace onnxruntime { namespace qnn { @@ -43,7 +41,8 @@ class QnnModel { Status SetupQnnInputOutput(const logging::Logger& logger); - Status ExecuteGraph(const Ort::KernelContext& context, const logging::Logger& logger); + Status ExecuteGraph(const Ort::KernelContext& context, + const logging::Logger& logger); const OnnxTensorInfo* GetOutputInfo(const std::string& name) const { auto it = outputs_info_.find(name); @@ -111,10 +110,6 @@ class QnnModel { const std::unordered_map& node_unit_map) const; bool GetGraphInfoFromModel(QnnModelWrapper& model_wrapper, const logging::Logger& logger); - Status GetQnnTensorDataLength(const std::vector& dims, - Qnn_DataType_t data_type, - size_t& data_length) const; - Status SetupTensors(std::vector& tensors, const std::vector& tensor_wrappers, bool is_input = true); diff --git a/onnxruntime/core/providers/qnn/builder/qnn_model_wrapper.cc b/onnxruntime/core/providers/qnn/builder/qnn_model_wrapper.cc index 129a015164ad4..6bd12959afbdf 100644 --- a/onnxruntime/core/providers/qnn/builder/qnn_model_wrapper.cc +++ b/onnxruntime/core/providers/qnn/builder/qnn_model_wrapper.cc @@ -1,6 +1,8 @@ // Copyright (c) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. +#include "core/providers/qnn/builder/qnn_model_wrapper.h" + #include #include #include @@ -8,10 +10,7 @@ #include #include -#include "qnn_model_wrapper.h" -#include "core/common/safeint.h" -#include "core/framework/tensorprotoutils.h" -#include "core/providers/shared/utils/utils.h" +#include "core/providers/qnn/ort_api.h" #include "core/providers/qnn/builder/qnn_utils.h" namespace onnxruntime { @@ -30,21 +29,23 @@ bool QnnModelWrapper::CreateQnnGraph(const Qnn_ContextHandle_t& context, return false; } if (graph_name.length() == 0) { - LOGS(logger_, ERROR) << "Empty grpah name."; + LOGS(logger_, ERROR) << "Empty graph name."; return false; } - graph_name_ = graph_name; - auto rt = qnn_interface_.graphCreate(context, graph_name_.c_str(), graph_configs, &graph_); + auto rt = qnn_interface_.graphCreate(context, graph_name.c_str(), graph_configs, &graph_); if (rt != QNN_GRAPH_NO_ERROR || graph_ == nullptr) { - rt = qnn_interface_.graphRetrieve(context, graph_name_.c_str(), &graph_); + rt = qnn_interface_.graphRetrieve(context, graph_name.c_str(), &graph_); if (rt != QNN_GRAPH_NO_ERROR || graph_ == nullptr) { LOGS(logger_, ERROR) << "Failed to create Qnn graph: " << graph_name; return false; } } + LOGS(logger_, VERBOSE) << "Created Qnn graph: " << graph_name; + graph_name_ = graph_name; + graph_context_ = context; return true; } @@ -73,6 +74,20 @@ Status QnnModelWrapper::MakeTensorWrapper(const NodeUnitIODef& tensor, QnnTensor return Status::OK(); } +Status QnnModelWrapper::MakeTensorWrapper(const TensorInfo& tensor_info, + const std::string& tensor_name, + QnnTensorWrapper& tensor_wrapper) const { + std::vector unpacked_tensor; + if (tensor_info.is_initializer) { + ORT_RETURN_IF_ERROR(UnpackInitializerData(*tensor_info.initializer_tensor, unpacked_tensor)); + } + + tensor_wrapper = QnnTensorWrapper(tensor_name, GetTensorType(tensor_name), tensor_info.qnn_data_type, + tensor_info.quant_param.Copy(), std::vector(tensor_info.shape), + std::move(unpacked_tensor)); + return Status::OK(); +} + bool QnnModelWrapper::AddTensorWrapper(QnnTensorWrapper&& tensor_wrapper) { // Keep a copy of tensor name sine it will be moved with the wrapper into model_tensors_map_ std::string tensor_name = tensor_wrapper.GetName(); @@ -445,7 +460,7 @@ Status QnnModelWrapper::IsPerChannelQuantized(const onnxruntime::NodeUnitIODef& ORT_RETURN_IF(iter == graph_initializers.end(), "Unable to find initializer for scale(s): ", scale_name.c_str()); gsl::not_null scale_tensor_proto = iter->second; - TensorShape scale_shape = onnxruntime::utils::GetTensorShapeFromTensorProto(*scale_tensor_proto); + TensorShape scale_shape(qnn::utils::GetInitializerShape(*scale_tensor_proto)); // Check the number of scale values to determine if the tensor is per-channel. // This is consistent with CPU EP's Quant/Dequant logic. We can't use the presence of an axis because even a @@ -620,29 +635,13 @@ Status QnnModelWrapper::UnpackInitializerData(const ONNX_NAMESPACE::TensorProto& // If this is an int4, we need to unpack it because QNN treats int4 as a full int8. if (onnx_data_type == ONNX_NAMESPACE::TensorProto_DataType_INT4) { - TensorShape shape = onnxruntime::utils::GetTensorShapeFromTensorProto(initializer); - const size_t num_elems = shape.Size(); - std::vector packed_int4_bytes = std::move(unpacked_tensor); - unpacked_tensor = std::vector(num_elems); - - auto dst = gsl::make_span(reinterpret_cast(unpacked_tensor.data()), unpacked_tensor.size()); - auto src = gsl::make_span(reinterpret_cast(packed_int4_bytes.data()), packed_int4_bytes.size()); - ORT_RETURN_IF_NOT(Int4x2::Unpack(dst, src), "Failed to unpack Tensor for QNN"); - - // NOTE: Masking off top 4 bits to workaround a QNN INT4 accuracy bug. - // Docs explicitly state that masking off top 4 bits should not be required. - for (size_t i = 0; i < dst.size(); i++) { - dst[i] &= 0x0F; // -3 (0b1111_1101) becomes 13 (0b0000_1101) - } + TensorShape shape(qnn::utils::GetInitializerShape(initializer)); + const size_t num_int4_elems = shape.Size(); + ORT_RETURN_IF_ERROR(qnn::utils::UnpackInt4ToInt8(num_int4_elems, unpacked_tensor)); } else if (onnx_data_type == ONNX_NAMESPACE::TensorProto_DataType_UINT4) { - TensorShape shape = onnxruntime::utils::GetTensorShapeFromTensorProto(initializer); - const size_t num_elems = shape.Size(); - std::vector packed_int4_bytes = std::move(unpacked_tensor); - unpacked_tensor = std::vector(num_elems); - - auto dst = gsl::make_span(reinterpret_cast(unpacked_tensor.data()), unpacked_tensor.size()); - auto src = gsl::make_span(reinterpret_cast(packed_int4_bytes.data()), packed_int4_bytes.size()); - ORT_RETURN_IF_NOT(UInt4x2::Unpack(dst, src), "Failed to unpack Tensor for QNN"); + TensorShape shape(qnn::utils::GetInitializerShape(initializer)); + const size_t num_uint4_elems = shape.Size(); + ORT_RETURN_IF_ERROR(qnn::utils::UnpackInt4ToInt8(num_uint4_elems, unpacked_tensor)); } return Status::OK(); diff --git a/onnxruntime/core/providers/qnn/builder/qnn_model_wrapper.h b/onnxruntime/core/providers/qnn/builder/qnn_model_wrapper.h index 875c93a68672d..203250204d7f8 100644 --- a/onnxruntime/core/providers/qnn/builder/qnn_model_wrapper.h +++ b/onnxruntime/core/providers/qnn/builder/qnn_model_wrapper.h @@ -7,13 +7,10 @@ #include #include -#include "core/common/status.h" #include "QnnInterface.h" #include "qnn_def.h" -#include "core/common/logging/logging.h" -#include "core/framework/node_unit.h" -#include "core/graph/graph_viewer.h" -#include "core/providers/shared/utils/utils.h" + +#include "core/providers/qnn/ort_api.h" #include "core/providers/qnn/builder/qnn_quant_params_wrapper.h" namespace onnxruntime { @@ -66,6 +63,9 @@ class QnnModelWrapper { // Make a QnnTensorWrapper from an onnx input or output. Status MakeTensorWrapper(const NodeUnitIODef& tensor, QnnTensorWrapper& tensor_wrapper) const; + Status MakeTensorWrapper(const TensorInfo& tensor_info, + const std::string& tensor_name, + QnnTensorWrapper& tensor_wrapper) const; // Add to internal tensor wrapper table bool AddTensorWrapper(QnnTensorWrapper&& tensor_wrapper); @@ -93,10 +93,12 @@ class QnnModelWrapper { bool ComposeQnnGraph(); - Qnn_GraphHandle_t GetQnnGraph() { return graph_; } + Qnn_GraphHandle_t GetQnnGraph() const { return graph_; } std::string GetQnnGraphName() const { return graph_name_; } + Qnn_ContextHandle_t GetQnnGraphContext() const { return graph_context_; } + // Move input tensor wrappers to GraphInfo, QnnModelWrapper end of live std::vector&& GetGraphInputTensorWrappers() { GetGraphInputOutputTensorWrapper(model_input_names_, model_input_tensor_wrappers_); @@ -299,6 +301,8 @@ class QnnModelWrapper { const Qnn_BackendHandle_t& backend_handle_; Qnn_GraphHandle_t graph_ = nullptr; std::string graph_name_ = ""; + // QNN context that holds the QNN graph referenced by `graph_` + Qnn_ContextHandle_t graph_context_ = nullptr; std::vector model_input_names_; std::vector model_output_names_; diff --git a/onnxruntime/core/providers/qnn/builder/qnn_node_group.h b/onnxruntime/core/providers/qnn/builder/qnn_node_group.h index f9ef01411310f..276fbaae3b3c9 100644 --- a/onnxruntime/core/providers/qnn/builder/qnn_node_group.h +++ b/onnxruntime/core/providers/qnn/builder/qnn_node_group.h @@ -8,8 +8,7 @@ #include #include -#include "core/common/logging/logging.h" -#include "core/framework/node_unit.h" +#include "core/providers/qnn/ort_api.h" namespace onnxruntime { namespace qnn { diff --git a/onnxruntime/core/providers/qnn/builder/qnn_node_group/dq_q_fusion.cc b/onnxruntime/core/providers/qnn/builder/qnn_node_group/dq_q_fusion.cc index caf4725626338..3af2fdd1f0276 100644 --- a/onnxruntime/core/providers/qnn/builder/qnn_node_group/dq_q_fusion.cc +++ b/onnxruntime/core/providers/qnn/builder/qnn_node_group/dq_q_fusion.cc @@ -6,9 +6,8 @@ #include #include #include -#include "core/graph/graph_utils.h" -#include "core/framework/node_unit.h" -#include "core/providers/shared/utils/utils.h" + +#include "core/providers/qnn/ort_api.h" #include "core/providers/qnn/builder/qnn_utils.h" #include "core/providers/qnn/builder/op_builder_factory.h" #include "core/providers/qnn/builder/qnn_node_group/utils.h" diff --git a/onnxruntime/core/providers/qnn/builder/qnn_node_group/dq_q_fusion.h b/onnxruntime/core/providers/qnn/builder/qnn_node_group/dq_q_fusion.h index 90fe44c3af059..d3d552bc172ec 100644 --- a/onnxruntime/core/providers/qnn/builder/qnn_node_group/dq_q_fusion.h +++ b/onnxruntime/core/providers/qnn/builder/qnn_node_group/dq_q_fusion.h @@ -7,8 +7,7 @@ #include #include -#include "core/common/common.h" -#include "core/framework/node_unit.h" +#include "core/providers/qnn/ort_api.h" #include "core/providers/qnn/builder/qnn_node_group.h" namespace onnxruntime { diff --git a/onnxruntime/core/providers/qnn/builder/qnn_node_group/hardsigmoid_mul_fusion.cc b/onnxruntime/core/providers/qnn/builder/qnn_node_group/hardsigmoid_mul_fusion.cc index 76b1726646486..5094ad96724f5 100644 --- a/onnxruntime/core/providers/qnn/builder/qnn_node_group/hardsigmoid_mul_fusion.cc +++ b/onnxruntime/core/providers/qnn/builder/qnn_node_group/hardsigmoid_mul_fusion.cc @@ -6,9 +6,8 @@ #include #include #include -#include "core/graph/graph_utils.h" -#include "core/framework/node_unit.h" -#include "core/providers/shared/utils/utils.h" + +#include "core/providers/qnn/ort_api.h" #include "core/providers/qnn/builder/qnn_utils.h" #include "core/providers/qnn/builder/op_builder_factory.h" #include "core/providers/qnn/builder/qnn_model_wrapper.h" diff --git a/onnxruntime/core/providers/qnn/builder/qnn_node_group/hardsigmoid_mul_fusion.h b/onnxruntime/core/providers/qnn/builder/qnn_node_group/hardsigmoid_mul_fusion.h index 3b67f13492a46..0a1b16d24ffcd 100644 --- a/onnxruntime/core/providers/qnn/builder/qnn_node_group/hardsigmoid_mul_fusion.h +++ b/onnxruntime/core/providers/qnn/builder/qnn_node_group/hardsigmoid_mul_fusion.h @@ -7,8 +7,7 @@ #include #include -#include "core/common/common.h" -#include "core/framework/node_unit.h" +#include "core/providers/qnn/ort_api.h" #include "core/providers/qnn/builder/qnn_node_group.h" namespace onnxruntime { diff --git a/onnxruntime/core/providers/qnn/builder/qnn_node_group/qnn_node_group.cc b/onnxruntime/core/providers/qnn/builder/qnn_node_group/qnn_node_group.cc index c398d1fae5097..e947da1a60e7a 100644 --- a/onnxruntime/core/providers/qnn/builder/qnn_node_group/qnn_node_group.cc +++ b/onnxruntime/core/providers/qnn/builder/qnn_node_group/qnn_node_group.cc @@ -10,8 +10,7 @@ #include #include #include -#include "core/graph/graph_utils.h" -#include "core/framework/node_unit.h" +#include "core/providers/qnn/ort_api.h" #include "core/providers/qnn/builder/qnn_utils.h" #include "core/providers/qnn/builder/qnn_model_wrapper.h" #include "core/providers/qnn/builder/op_builder_factory.h" diff --git a/onnxruntime/core/providers/qnn/builder/qnn_node_group/utils.cc b/onnxruntime/core/providers/qnn/builder/qnn_node_group/utils.cc index 5548d7d37c378..93b2fca296389 100644 --- a/onnxruntime/core/providers/qnn/builder/qnn_node_group/utils.cc +++ b/onnxruntime/core/providers/qnn/builder/qnn_node_group/utils.cc @@ -4,8 +4,7 @@ #include #include -#include "core/graph/graph_viewer.h" -#include "core/framework/node_unit.h" +#include "core/providers/qnn/ort_api.h" #include "core/providers/qnn/builder/qnn_node_group.h" namespace onnxruntime { diff --git a/onnxruntime/core/providers/qnn/builder/qnn_node_group/utils.h b/onnxruntime/core/providers/qnn/builder/qnn_node_group/utils.h index 0d11d21906ccb..c4cf4e8a20a92 100644 --- a/onnxruntime/core/providers/qnn/builder/qnn_node_group/utils.h +++ b/onnxruntime/core/providers/qnn/builder/qnn_node_group/utils.h @@ -7,8 +7,7 @@ #include #include -#include "core/graph/graph_viewer.h" -#include "core/framework/node_unit.h" +#include "core/providers/qnn/ort_api.h" #include "core/providers/qnn/builder/qnn_node_group.h" namespace onnxruntime { diff --git a/onnxruntime/core/providers/qnn/builder/qnn_quant_params_wrapper.h b/onnxruntime/core/providers/qnn/builder/qnn_quant_params_wrapper.h index 23330f5616d73..01c15cf4bebe6 100644 --- a/onnxruntime/core/providers/qnn/builder/qnn_quant_params_wrapper.h +++ b/onnxruntime/core/providers/qnn/builder/qnn_quant_params_wrapper.h @@ -4,10 +4,10 @@ #pragma once #include #include -#include "QnnTypes.h" -#include "core/common/common.h" #include -#include "core/framework/node_unit.h" + +#include "core/providers/qnn/ort_api.h" +#include "QnnTypes.h" namespace onnxruntime { namespace qnn { diff --git a/onnxruntime/core/providers/qnn/builder/qnn_utils.cc b/onnxruntime/core/providers/qnn/builder/qnn_utils.cc index 8d2cb5bdb6da0..56c3d3e803d9b 100644 --- a/onnxruntime/core/providers/qnn/builder/qnn_utils.cc +++ b/onnxruntime/core/providers/qnn/builder/qnn_utils.cc @@ -1,15 +1,17 @@ // Copyright (c) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. +#include "core/providers/qnn/builder/qnn_utils.h" + +#include #include +#include +#include #include #include #include -#include -#include "core/common/common.h" -#include "core/framework/data_types.h" -#include "qnn_utils.h" +#include "core/providers/qnn/ort_api.h" #include "core/providers/qnn/builder/qnn_def.h" namespace onnxruntime { @@ -63,6 +65,59 @@ size_t GetElementSizeByType(ONNXTensorElementDataType elem_type) { return pos->second; } +size_t GetElementSizeByType(ONNX_NAMESPACE::TensorProto_DataType onnx_type) { + switch (onnx_type) { + case ONNX_NAMESPACE::TensorProto_DataType_INT4: + return sizeof(Int4x2); + case ONNX_NAMESPACE::TensorProto_DataType_UINT4: + return sizeof(UInt4x2); + case ONNX_NAMESPACE::TensorProto_DataType_INT8: + return sizeof(int8_t); + case ONNX_NAMESPACE::TensorProto_DataType_UINT8: + return sizeof(uint8_t); + case ONNX_NAMESPACE::TensorProto_DataType_INT16: + return sizeof(int16_t); + case ONNX_NAMESPACE::TensorProto_DataType_UINT16: + return sizeof(uint16_t); + case ONNX_NAMESPACE::TensorProto_DataType_INT32: + return sizeof(int32_t); + case ONNX_NAMESPACE::TensorProto_DataType_UINT32: + return sizeof(uint32_t); + case ONNX_NAMESPACE::TensorProto_DataType_INT64: + return sizeof(int64_t); + case ONNX_NAMESPACE::TensorProto_DataType_UINT64: + return sizeof(uint64_t); + case ONNX_NAMESPACE::TensorProto_DataType_FLOAT16: + return 2; + case ONNX_NAMESPACE::TensorProto_DataType_FLOAT: + return sizeof(float); + case ONNX_NAMESPACE::TensorProto_DataType_DOUBLE: + return sizeof(double); + case ONNX_NAMESPACE::TensorProto_DataType_BOOL: + return sizeof(bool); + default: + return 0; + } + // Unreachable +} + +size_t GetQnnTensorDataSizeInBytes(gsl::span shape, Qnn_DataType_t element_type) { + ORT_ENFORCE(!shape.empty(), "Empty shape not allowed."); // TODO can we just treat empty shape as a scalar? + SafeInt data_length = GetElementSizeByType(element_type); + return std::accumulate(shape.begin(), shape.end(), data_length, std::multiplies<>{}); +} + +bool QnnTensorHasDynamicShape(const Qnn_Tensor_t& tensor) { + const uint8_t* is_dynamic_dimensions = GetQnnTensorIsDynamicDimensions(tensor); + if (is_dynamic_dimensions == nullptr) { + return false; + } + + const auto rank = GetQnnTensorRank(tensor); + return std::any_of(is_dynamic_dimensions, is_dynamic_dimensions + rank, + [](uint8_t is_dynamic_dimension) { return is_dynamic_dimension != 0; }); +} + std::ostream& operator<<(std::ostream& out, const Qnn_Scalar_t& scalar) { switch (scalar.dataType) { case QNN_DATATYPE_INT_8: @@ -487,39 +542,22 @@ bool OnnxDataTypeToQnnDataType(const int32_t onnx_data_type, Qnn_DataType_t& qnn } std::pair CheckMinMax(float rmin, float rmax) { - // Ensure a minimum range of 0.0001 (required by QNN) - rmax = std::max(rmax, rmin + 0.0001f); - // Both QNN and ORT require the range to include 0.0f rmin = std::min(rmin, 0.0f); rmax = std::max(rmax, 0.0f); + // Ensure a minimum range of 0.0001 (required by QNN) + rmax = std::max(rmax, rmin + 0.0001f); + return std::make_pair(rmin, rmax); } -template -Status GetQminQmax(const Qnn_DataType_t qnn_data_type, - T& qmin, - T& qmax) { - if (qnn_data_type == QNN_DATATYPE_SFIXED_POINT_8) { - qmin = static_cast(std::numeric_limits::min()); - qmax = static_cast(std::numeric_limits::max()); - } else if (qnn_data_type == QNN_DATATYPE_UFIXED_POINT_8) { - qmin = static_cast(std::numeric_limits::min()); - qmax = static_cast(std::numeric_limits::max()); - } else if (qnn_data_type == QNN_DATATYPE_SFIXED_POINT_16) { - qmin = static_cast(std::numeric_limits::min()); - qmax = static_cast(std::numeric_limits::max()); - } else if (qnn_data_type == QNN_DATATYPE_UFIXED_POINT_16) { - qmin = static_cast(std::numeric_limits::min()); - qmax = static_cast(std::numeric_limits::max()); - } else if (qnn_data_type == QNN_DATATYPE_SFIXED_POINT_32) { - qmin = static_cast(std::numeric_limits::min()); - qmax = static_cast(std::numeric_limits::max()); - } else { - ORT_RETURN_IF(true, "Qnn Data Type: %d not supported yet.", qnn_data_type); +inline float RoundHalfToEven(float input) { + if (!std::isfinite(input)) { + return input; } - return Status::OK(); + // std::remainder returns x - n, where n is the integral value nearest to x. When |x - n| = 0.5, n is chosen to be even + return input - std::remainderf(input, 1.f); } Status GetQuantParams(float rmin, @@ -535,20 +573,22 @@ Status GetQuantParams(float rmin, rmin = -abs_max; } - float qmin = 0.0f; - float qmax = 255.0f; - ORT_RETURN_IF_ERROR(GetQminQmax(qnn_data_type, qmin, qmax)); + double rmin_dbl = static_cast(rmin); + double rmax_dbl = static_cast(rmax); + double qmin = 0.0; + double qmax = 0.0; + ORT_RETURN_IF_ERROR(GetQminQmax(qnn_data_type, qmin, qmax, symmetric)); - scale = (rmax - rmin) / (qmax - qmin); - float initial_zero_point = 0.0f; + double scale_dbl = (rmax_dbl - rmin_dbl) / (qmax - qmin); + double initial_zero_point = 0.0; if (symmetric) { - initial_zero_point = std::round(rmin + rmax) / 2; + initial_zero_point = std::round(rmin_dbl + rmax_dbl) / 2; } else { - initial_zero_point = qmin - (rmin / scale); + initial_zero_point = qmin - (rmin_dbl / scale_dbl); } - zero_point = static_cast(RoundHalfToEven(Saturate(qmax, qmin, initial_zero_point))); - // To match QNN quantization definition - zero_point = 0 - zero_point; + zero_point = static_cast(RoundHalfToEven(static_cast(Saturate(qmax, qmin, initial_zero_point)))); + zero_point = -zero_point; // Negate to match QNN quantization definition. + scale = static_cast(scale_dbl); return Status::OK(); } @@ -570,6 +610,147 @@ Status Quantize(const double double_value, return Status::OK(); } +size_t ShapeSizeCalc(gsl::span shape, size_t start, size_t end) { + size_t size = 1; + for (size_t i = start; i < end; i++) { + size *= shape[i]; + } + return size; +} + +Status GetDataQuantParams(gsl::span data, gsl::span shape, + /*out*/ gsl::span scales, /*out*/ gsl::span offsets, + Qnn_DataType_t data_type, bool symmetric, std::optional axis) { + const size_t num_dims = shape.size(); + const size_t num_elems = ShapeSizeCalc(shape, 0, num_dims); + ORT_RETURN_IF_NOT(num_elems == data.size(), "Shape mismatch with data to quantize"); + + size_t block_count = 1; + size_t broadcast_dim = 1; + size_t block_size = num_elems; + + if (axis.has_value()) { + size_t axis_no_neg = *axis < 0 ? static_cast(*axis) + num_dims : static_cast(*axis); + block_count = ShapeSizeCalc(shape, 0, axis_no_neg); + broadcast_dim = shape[axis_no_neg]; + block_size = ShapeSizeCalc(shape, axis_no_neg + 1, num_dims); + } + + ORT_RETURN_IF_NOT(scales.size() == broadcast_dim, "Unexpected size of scales output buffer"); + ORT_RETURN_IF_NOT(offsets.size() == broadcast_dim, "Unexpected size of offsets output buffer"); + + size_t i = 0; + for (size_t n = 0; n < block_count; n++) { + for (size_t bd = 0; bd < broadcast_dim; bd++) { + float rmin = std::numeric_limits::max(); + float rmax = std::numeric_limits::lowest(); + for (size_t j = 0; j < block_size; j++) { + rmin = std::min(rmin, data[i]); + rmax = std::max(rmax, data[i]); + i++; + } + + scales[bd] = 1.0f; + offsets[bd] = 0; + ORT_RETURN_IF_ERROR(GetQuantParams(rmin, rmax, data_type, scales[bd], offsets[bd], symmetric)); + } + } + + assert(i == data.size()); + return Status::OK(); +} + +Status QuantizeData(gsl::span data, gsl::span shape, + gsl::span scales, gsl::span offsets, + /*out*/ gsl::span quant_bytes, Qnn_DataType_t data_type, + std::optional axis) { + const size_t num_dims = shape.size(); + const size_t num_elems = ShapeSizeCalc(shape, 0, num_dims); + ORT_RETURN_IF_NOT(num_elems == data.size(), "Shape mismatch with data to quantize"); + size_t expected_num_quant_bytes = GetElementSizeByType(data_type) * data.size(); + ORT_RETURN_IF_NOT(quant_bytes.size() == expected_num_quant_bytes, + "Cannot quantize data because output buffer is not the correct size"); + + size_t block_count = 1; + size_t broadcast_dim = 1; + size_t block_size = num_elems; + + if (axis.has_value()) { + size_t axis_no_neg = *axis < 0 ? static_cast(*axis) + num_dims : static_cast(*axis); + block_count = ShapeSizeCalc(shape, 0, axis_no_neg); + broadcast_dim = shape[axis_no_neg]; + block_size = ShapeSizeCalc(shape, axis_no_neg + 1, num_dims); + } + + ORT_RETURN_IF_NOT(scales.size() == broadcast_dim, "Unexpected size of scales output buffer"); + ORT_RETURN_IF_NOT(offsets.size() == broadcast_dim, "Unexpected size of offsets output buffer"); + + size_t i = 0; + for (size_t n = 0; n < block_count; n++) { + for (size_t bd = 0; bd < broadcast_dim; bd++) { + switch (data_type) { + case QNN_DATATYPE_SFIXED_POINT_8: { + auto input_span = gsl::make_span(&data[i], block_size); + auto output_span = gsl::make_span(&quant_bytes[i * sizeof(int8_t)], sizeof(int8_t) * block_size); + ORT_RETURN_IF_ERROR(QuantizeData(input_span, scales[bd], offsets[bd], output_span)); + break; + } + case QNN_DATATYPE_UFIXED_POINT_8: { + auto input_span = gsl::make_span(&data[i], block_size); + auto output_span = gsl::make_span(&quant_bytes[i * sizeof(uint8_t)], sizeof(uint8_t) * block_size); + ORT_RETURN_IF_ERROR(QuantizeData(input_span, scales[bd], offsets[bd], output_span)); + break; + } + case QNN_DATATYPE_SFIXED_POINT_16: { + auto input_span = gsl::make_span(&data[i], block_size); + auto output_span = gsl::make_span(&quant_bytes[i * sizeof(int16_t)], sizeof(int16_t) * block_size); + ORT_RETURN_IF_ERROR(QuantizeData(input_span, scales[bd], offsets[bd], output_span)); + break; + } + case QNN_DATATYPE_UFIXED_POINT_16: { + auto input_span = gsl::make_span(&data[i], block_size); + auto output_span = gsl::make_span(&quant_bytes[i * sizeof(uint16_t)], sizeof(uint16_t) * block_size); + ORT_RETURN_IF_ERROR(QuantizeData(input_span, scales[bd], offsets[bd], output_span)); + break; + } + case QNN_DATATYPE_SFIXED_POINT_32: { + auto input_span = gsl::make_span(&data[i], block_size); + auto output_span = gsl::make_span(&quant_bytes[i * sizeof(int32_t)], sizeof(int32_t) * block_size); + ORT_RETURN_IF_ERROR(QuantizeData(input_span, scales[bd], offsets[bd], output_span)); + break; + } + default: + return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT, "Unsupported quantization data type for QuantizeData"); + } + i += block_size; + } + } + assert(i == data.size()); + + return Status::OK(); +} + +std::string_view GetQnnErrorMessage(const QNN_INTERFACE_VER_TYPE& qnn_interface, Qnn_ErrorHandle_t qnn_error_handle) { + // From QNN SDK: The memory is statically owned and should not be freed by the caller. + const char* error_msg = nullptr; + if (qnn_interface.errorGetMessage(qnn_error_handle, &error_msg) == QNN_SUCCESS) { + return error_msg; + } + return "Unknown error."; +} + +std::string GetVerboseQnnErrorMessage(const QNN_INTERFACE_VER_TYPE& qnn_interface, + Qnn_ErrorHandle_t qnn_error_handle) { + const char* error_msg = nullptr; + if (qnn_interface.errorGetVerboseMessage(qnn_error_handle, &error_msg) == QNN_SUCCESS) { + auto free_error_msg = gsl::finally([&qnn_interface, error_msg] { + qnn_interface.errorFreeVerboseMessage(error_msg); + }); + return error_msg; + } + return "Unknown error."; +} + } // namespace utils } // namespace qnn } // namespace onnxruntime diff --git a/onnxruntime/core/providers/qnn/builder/qnn_utils.h b/onnxruntime/core/providers/qnn/builder/qnn_utils.h index aa4a27460563f..853debb61a12f 100644 --- a/onnxruntime/core/providers/qnn/builder/qnn_utils.h +++ b/onnxruntime/core/providers/qnn/builder/qnn_utils.h @@ -2,16 +2,21 @@ // Licensed under the MIT License. #pragma once +#include #include #include #include +#include #include +#include #include +#include + +#include "QnnInterface.h" #include "QnnTypes.h" -#include "core/session/onnxruntime_cxx_api.h" -#include "core/framework/node_unit.h" -#include "core/util/qmath.h" + +#include "core/providers/qnn/ort_api.h" namespace onnxruntime { namespace qnn { @@ -22,6 +27,12 @@ size_t GetElementSizeByType(const Qnn_DataType_t& data_type); size_t GetElementSizeByType(ONNXTensorElementDataType elem_type); +size_t GetElementSizeByType(ONNX_NAMESPACE::TensorProto_DataType onnx_type); + +size_t GetQnnTensorDataSizeInBytes(gsl::span shape, Qnn_DataType_t element_data_type); + +bool QnnTensorHasDynamicShape(const Qnn_Tensor_t& tensor); + // TODO: make these work with Wrappers? std::ostream& operator<<(std::ostream& out, const Qnn_Param_t& qnn_param); std::ostream& operator<<(std::ostream& out, const Qnn_Tensor_t& tensor); @@ -74,7 +85,30 @@ static bool ArrayHasString(const std::array& strings, std:: std::pair CheckMinMax(float rmin, float rmax); template -Status GetQminQmax(const Qnn_DataType_t qnn_data_type, T& qmin, T& qmax); +Status GetQminQmax(const Qnn_DataType_t qnn_data_type, + T& qmin, + T& qmax, + bool symmetric = false) { + if (qnn_data_type == QNN_DATATYPE_SFIXED_POINT_8) { + qmin = static_cast(std::numeric_limits::min() + static_cast(symmetric)); + qmax = static_cast(std::numeric_limits::max()); + } else if (qnn_data_type == QNN_DATATYPE_UFIXED_POINT_8) { + qmin = static_cast(std::numeric_limits::min()); + qmax = static_cast(std::numeric_limits::max()); + } else if (qnn_data_type == QNN_DATATYPE_SFIXED_POINT_16) { + qmin = static_cast(std::numeric_limits::min() + static_cast(symmetric)); + qmax = static_cast(std::numeric_limits::max()); + } else if (qnn_data_type == QNN_DATATYPE_UFIXED_POINT_16) { + qmin = static_cast(std::numeric_limits::min()); + qmax = static_cast(std::numeric_limits::max()); + } else if (qnn_data_type == QNN_DATATYPE_SFIXED_POINT_32) { + qmin = static_cast(std::numeric_limits::min() + static_cast(symmetric)); + qmax = static_cast(std::numeric_limits::max()); + } else { + ORT_RETURN_IF(true, "Qnn Data Type: %d not supported yet.", qnn_data_type); + } + return Status::OK(); +} template inline T Saturate(const T qmax, @@ -104,6 +138,112 @@ Status Quantize(const double double_value, const Qnn_DataType_t qnn_data_type, int& quant_value); +size_t ShapeSizeCalc(gsl::span shape, size_t start, size_t end); + +// Computes the quantization parameters (scales and offsets) for the given data. +// Supports both per-tensor and per-channel quantization. Must provide an axis argument +// for per-channel quantization. +// The offsets use the QNN convention where offset = -zero_point. +Status GetDataQuantParams(gsl::span data, gsl::span shape, + /*out*/ gsl::span scales, /*out*/ gsl::span offsets, + Qnn_DataType_t data_type, bool symmetric = false, + std::optional axis = std::nullopt); + +// Quantizes the given float data using the provided quantization parameters (scales and offsets). +// Supports both per-tensor and per-channel quantization. Must provide an axis argument +// for per-channel quantization. +// The provided offsets must use the QNN convention where offset = -zero_point. +Status QuantizeData(gsl::span data, gsl::span shape, + gsl::span scales, gsl::span offsets, + /*out*/ gsl::span quant_bytes, Qnn_DataType_t data_type, + std::optional axis = std::nullopt); + +// Quantizes (per-tensor) the given float data using the provided scale and offset. +// The provided offset must use the QNN convention where offset = -zero_point. +template +inline Status QuantizeData(gsl::span data, float scale, int32_t offset, + /*out*/ gsl::span quant_bytes) { + const size_t num_elems = data.size(); + const size_t expected_output_bytes = sizeof(QuantType) * num_elems; + ORT_RETURN_IF_NOT(expected_output_bytes == quant_bytes.size(), + "Output buffer is not large enough to hold quantized bytes."); + const double clip_min = static_cast(std::numeric_limits::lowest()); + const double clip_max = static_cast(std::numeric_limits::max()); + + QuantType* output = reinterpret_cast(quant_bytes.data()); + for (size_t i = 0; i < num_elems; ++i) { + const double scale_dbl = static_cast(scale); + const double offset_dbl = static_cast(offset); + double float_val = std::nearbyint(static_cast(data[i]) / scale_dbl) - offset_dbl; + float_val = std::max(float_val, clip_min); + float_val = std::min(float_val, clip_max); + output[i] = static_cast(float_val); + } + return Status::OK(); +} + +// Re-writes a buffer of packed 4-bit elements to a buffer of unpacked 8-bit elements. +// QNN requires that 4-bit weights are unpacked to 8-bit. +template +Status UnpackInt4ToInt8(size_t num_int4_elems, std::vector& data_bytes) { + if constexpr (Signed) { // INT4 + std::vector packed_int4_bytes = std::move(data_bytes); + data_bytes = std::vector(num_int4_elems); + + auto dst = gsl::make_span(reinterpret_cast(data_bytes.data()), data_bytes.size()); + auto src = gsl::make_span(reinterpret_cast(packed_int4_bytes.data()), packed_int4_bytes.size()); + ORT_RETURN_IF_NOT(Int4x2::Unpack(dst, src), "Failed to unpack Tensor for QNN"); + + // NOTE: Masking off top 4 bits to workaround a QNN INT4 accuracy bug. + // Docs explicitly state that masking off top 4 bits should not be required, but we have to do it. + for (size_t i = 0; i < dst.size(); i++) { + dst[i] &= 0x0F; // -3 (0b1111_1101) becomes 13 (0b0000_1101) + } + } else { // UINT4 + std::vector packed_uint4_bytes = std::move(data_bytes); + data_bytes = std::vector(num_int4_elems); + + auto dst = gsl::make_span(reinterpret_cast(data_bytes.data()), data_bytes.size()); + auto src = gsl::make_span(reinterpret_cast(packed_uint4_bytes.data()), packed_uint4_bytes.size()); + ORT_RETURN_IF_NOT(UInt4x2::Unpack(dst, src), "Failed to unpack Tensor for QNN"); + } + + return Status::OK(); +} + +template +std::vector GetInitializerShape(const ONNX_NAMESPACE::TensorProto& tensor_proto) { + const auto& dims = tensor_proto.dims(); + std::vector tensor_shape_vec(static_cast(dims.size())); + for (int i = 0; i < dims.size(); ++i) { + tensor_shape_vec[i] = static_cast(dims[i]); + } + + return tensor_shape_vec; +} + +template +Status PermuteShape(gsl::span input_shape, gsl::span perm, gsl::span output_shape) { + const size_t rank = input_shape.size(); + ORT_RETURN_IF_NOT(rank == perm.size() && rank == output_shape.size(), + "PermuteShape(): expect all arguments to have the same rank."); + + for (size_t i = 0; i < rank; ++i) { + size_t p = static_cast(perm[i]); + output_shape[i] = input_shape[p]; + } + + return Status::OK(); +} + +// Gets error message associated with QNN error handle value. +std::string_view GetQnnErrorMessage(const QNN_INTERFACE_VER_TYPE& qnn_interface, + Qnn_ErrorHandle_t qnn_error_handle); + +// Gets verbose error message associated with QNN error handle value. +std::string GetVerboseQnnErrorMessage(const QNN_INTERFACE_VER_TYPE& qnn_interface, + Qnn_ErrorHandle_t qnn_error_handle); + } // namespace utils } // namespace qnn } // namespace onnxruntime diff --git a/onnxruntime/core/providers/qnn/ort_api.cc b/onnxruntime/core/providers/qnn/ort_api.cc new file mode 100644 index 0000000000000..809593b409dad --- /dev/null +++ b/onnxruntime/core/providers/qnn/ort_api.cc @@ -0,0 +1,211 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +#include "core/providers/qnn/ort_api.h" + +#include +#include +#include + +namespace onnxruntime { + +#if BUILD_QNN_EP_STATIC_LIB +static std::unique_ptr>> s_run_on_unload_; + +void RunOnUnload(std::function function) { + static std::mutex mutex; + std::lock_guard guard(mutex); + if (!s_run_on_unload_) { + s_run_on_unload_ = std::make_unique>>(); + } + s_run_on_unload_->push_back(std::move(function)); +} + +struct OnUnload { + ~OnUnload() { + if (!s_run_on_unload_) + return; + + for (auto& function : *s_run_on_unload_) + function(); + + s_run_on_unload_.reset(); + } + +} g_on_unload; +#endif // BUILD_QNN_EP_STATIC_LIB + +std::vector Graph__Nodes(const Graph& graph) { +#if BUILD_QNN_EP_STATIC_LIB + std::vector nodes; + nodes.reserve(graph.NumberOfNodes()); + + for (const Node& node : graph.Nodes()) { + nodes.push_back(&node); + } + + return nodes; +#else + return graph.Nodes(); +#endif +} + +#if BUILD_QNN_EP_STATIC_LIB +#define NODE_ATTR_ITER_VAL(iter) (iter)->second +#else +#define NODE_ATTR_ITER_VAL(iter) (iter)->second() +#endif + +NodeAttrHelper::NodeAttrHelper(const onnxruntime::Node& node) + : node_attributes_(node.GetAttributes()) {} + +NodeAttrHelper::NodeAttrHelper(const NodeUnit& node_unit) + : node_attributes_(node_unit.GetNode().GetAttributes()) {} + +float NodeAttrHelper::Get(const std::string& key, float def_val) const { + if (auto entry = node_attributes_.find(key); entry != node_attributes_.end()) { + return NODE_ATTR_ITER_VAL(entry).f(); + } + + return def_val; +} + +int32_t NodeAttrHelper::Get(const std::string& key, int32_t def_val) const { + if (auto entry = node_attributes_.find(key); entry != node_attributes_.end()) { + return narrow(NODE_ATTR_ITER_VAL(entry).i()); + } + + return def_val; +} + +uint32_t NodeAttrHelper::Get(const std::string& key, uint32_t def_val) const { + if (auto entry = node_attributes_.find(key); entry != node_attributes_.end()) { + return narrow(NODE_ATTR_ITER_VAL(entry).i()); + } + + return def_val; +} + +int64_t NodeAttrHelper::Get(const std::string& key, int64_t def_val) const { + if (auto entry = node_attributes_.find(key); entry != node_attributes_.end()) { + return NODE_ATTR_ITER_VAL(entry).i(); + } + + return def_val; +} + +const std::string& NodeAttrHelper::Get(const std::string& key, const std::string& def_val) const { + if (auto entry = node_attributes_.find(key); entry != node_attributes_.end()) { + return NODE_ATTR_ITER_VAL(entry).s(); + } + + return def_val; +} + +std::vector NodeAttrHelper::Get(const std::string& key, const std::vector& def_val) const { + if (auto entry = node_attributes_.find(key); entry != node_attributes_.end()) { + const auto& values = NODE_ATTR_ITER_VAL(entry).ints(); + const int64_t* cbegin = values.data(); + const int64_t* cend = values.data() + values.size(); + std::vector v; + v.reserve(static_cast(values.size())); + std::transform(cbegin, cend, std::back_inserter(v), + [](int64_t val) -> int32_t { return narrow(val); }); + return v; + } + + return def_val; +} + +std::vector NodeAttrHelper::Get(const std::string& key, const std::vector& def_val) const { + if (auto entry = node_attributes_.find(key); entry != node_attributes_.end()) { + const auto& values = NODE_ATTR_ITER_VAL(entry).ints(); + const int64_t* cbegin = values.data(); + const int64_t* cend = values.data() + values.size(); + std::vector v; + v.reserve(static_cast(values.size())); + std::transform(cbegin, cend, std::back_inserter(v), + [](int64_t val) -> uint32_t { return narrow(val); }); + return v; + } + + return def_val; +} + +std::vector NodeAttrHelper::Get(const std::string& key, const std::vector& def_val) const { + if (auto entry = node_attributes_.find(key); entry != node_attributes_.end()) { + const auto& values = NODE_ATTR_ITER_VAL(entry).ints(); + const int64_t* cbegin = values.data(); + const int64_t* cend = values.data() + values.size(); + return std::vector{cbegin, cend}; + } + + return def_val; +} + +std::vector NodeAttrHelper::Get(const std::string& key, const std::vector& def_val) const { + if (auto entry = node_attributes_.find(key); entry != node_attributes_.end()) { + const auto& values = NODE_ATTR_ITER_VAL(entry).floats(); + const float* cbegin = values.data(); + const float* cend = values.data() + values.size(); + return std::vector{cbegin, cend}; + } + + return def_val; +} + +std::optional NodeAttrHelper::GetFloat(const std::string& key) const { + std::optional result; + if (auto entry = node_attributes_.find(key); entry != node_attributes_.end()) { + result = NODE_ATTR_ITER_VAL(entry).f(); + } + + return result; +} + +std::optional NodeAttrHelper::GetInt64(const std::string& key) const { + std::optional result; + if (auto entry = node_attributes_.find(key); entry != node_attributes_.end()) { + result = NODE_ATTR_ITER_VAL(entry).i(); + } + + return result; +} + +std::optional> NodeAttrHelper::GetFloats(const std::string& key) const { + std::optional> result; + if (auto entry = node_attributes_.find(key); entry != node_attributes_.end()) { + const auto& values = NODE_ATTR_ITER_VAL(entry).floats(); + const float* cbegin = values.data(); + const float* cend = values.data() + values.size(); + result = std::vector(cbegin, cend); + } + + return result; +} + +std::optional> NodeAttrHelper::GetInt64s(const std::string& key) const { + std::optional> result; + if (auto entry = node_attributes_.find(key); entry != node_attributes_.end()) { + const auto& values = NODE_ATTR_ITER_VAL(entry).ints(); + const int64_t* cbegin = values.data(); + const int64_t* cend = values.data() + values.size(); + result = std::vector(cbegin, cend); + } + + return result; +} + +std::optional NodeAttrHelper::GetString(const std::string& key) const { + std::optional result; + if (auto entry = node_attributes_.find(key); entry != node_attributes_.end()) { + result = NODE_ATTR_ITER_VAL(entry).s(); + } + + return result; +} + +bool NodeAttrHelper::HasAttr(const std::string& key) const { + return node_attributes_.find(key) != node_attributes_.end(); +} +} // namespace onnxruntime diff --git a/onnxruntime/core/providers/qnn/ort_api.h b/onnxruntime/core/providers/qnn/ort_api.h new file mode 100644 index 0000000000000..030ebbb54c615 --- /dev/null +++ b/onnxruntime/core/providers/qnn/ort_api.h @@ -0,0 +1,178 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License + +#pragma once + +// This compilation unit (ort_api.h/.cc) encapsulates the interface between the EP and ORT in a manner +// that allows QNN EP to built either as a static library or a dynamic shared library. +// The preprocessor macro `BUILD_QNN_EP_STATIC_LIB` is defined and set to 1 if QNN EP +// is built as a static library. + +#if BUILD_QNN_EP_STATIC_LIB +// Includes when building QNN EP statically +#ifdef _WIN32 +#include +#include +#include "core/platform/tracing.h" +#include "core/platform/windows/logging/etw_sink.h" +#endif + +#include "onnx/defs/data_type_utils.h" +#include "core/common/common.h" +#include "core/common/status.h" +#include "core/common/safeint.h" +#include "core/common/logging/logging.h" +#include "core/common/logging/capture.h" +#include "core/common/path_string.h" +#include "core/platform/env.h" +#include "core/framework/data_types.h" +#include "core/framework/float16.h" +#include "core/framework/run_options.h" +#include "core/framework/execution_provider.h" +#include "core/framework/model_metadef_id_generator.h" +#include "core/framework/compute_capability.h" +#include "core/framework/tensor_shape.h" +#include "core/framework/node_unit.h" +#include "core/framework/tensorprotoutils.h" +#include "core/framework/utils.h" +#include "core/graph/constants.h" +#include "core/graph/basic_types.h" +#include "core/graph/model.h" +#include "core/graph/graph_viewer.h" +#include "core/optimizer/qdq_transformer/selectors_actions/qdq_selectors.h" +#include "core/optimizer/qdq_transformer/selectors_actions/shared/utils.h" +#include "core/providers/common.h" +#include "core/providers/partitioning_utils.h" +#include "core/session/onnxruntime_cxx_api.h" +#else +// Includes when building QNN EP as a shared library +#include "core/providers/shared_library/provider_api.h" +#define ORT_API_MANUAL_INIT +#include "core/session/onnxruntime_cxx_api.h" +#endif + +#include "core/common/inlined_containers.h" +#include "core/session/onnxruntime_session_options_config_keys.h" +#include "core/session/onnxruntime_run_options_config_keys.h" + +#include +#include + +namespace onnxruntime { +#if BUILD_QNN_EP_STATIC_LIB +using Node_EdgeEnd = Node::EdgeEnd; +#endif + +#if BUILD_QNN_EP_STATIC_LIB +void RunOnUnload(std::function function); +inline const Env& GetDefaultEnv() { return Env::Default(); } +#endif + +inline void InitOrtCppApi() { +#if BUILD_QNN_EP_STATIC_LIB + // Do nothing. Including "onnxruntime_cxx_api.h" normally initializes the global api_ object. +#else + // Call util function in provider bridge that initializes the global api_ object. + InitProviderOrtApi(); +#endif +} + +///

+/// Creates an onnxruntime or onnx object. Works for both static and shared library builds of QNN EP. +/// +/// Example: auto model = Factory<Model>::Create(/* args ... */); +/// +/// Type of the object to create +template +struct Factory { + template + static inline std::unique_ptr Create(Params&&... params) { +#if BUILD_QNN_EP_STATIC_LIB + return std::make_unique(std::forward(params)...); +#else + return T::Create(std::forward(params)...); +#endif + } +}; + +inline const ConfigOptions& RunOptions__GetConfigOptions(const RunOptions& run_options) { +#if BUILD_QNN_EP_STATIC_LIB + return run_options.config_options; +#else + return run_options.GetConfigOptions(); +#endif +} + +inline std::unique_ptr& ComputeCapability__SubGraph(ComputeCapability& compute_cability) { +#if BUILD_QNN_EP_STATIC_LIB + return compute_cability.sub_graph; +#else + return compute_cability.SubGraph(); +#endif +} + +inline std::vector& IndexedSubGraph__Nodes(IndexedSubGraph& indexed_sub_graph) { +#if BUILD_QNN_EP_STATIC_LIB + return indexed_sub_graph.nodes; +#else + return indexed_sub_graph.Nodes(); +#endif +} + +std::vector Graph__Nodes(const Graph& graph); + +inline std::pair>, std::unordered_map> +GetQDQNodeUnits(const GraphViewer& graph_viewer, const logging::Logger& logger) { +#if BUILD_QNN_EP_STATIC_LIB + return QDQ::GetAllNodeUnits(graph_viewer, logger); +#else + return QDQ::GetAllNodeUnits(&graph_viewer, logger); +#endif +} + +/** + * Wrapping onnxruntime::Node for retrieving attribute values + */ +class NodeAttrHelper { + public: + explicit NodeAttrHelper(const Node& node); + + // Get the attributes from the target node of the node_unit + explicit NodeAttrHelper(const NodeUnit& node_unit); + + /* + * Get with default + */ + float Get(const std::string& key, float def_val) const; + std::vector Get(const std::string& key, const std::vector& def_val) const; + + int64_t Get(const std::string& key, int64_t def_val) const; + std::vector Get(const std::string& key, const std::vector& def_val) const; + + const std::string& Get(const std::string& key, const std::string& def_val) const; + + // Convert the i() or ints() of the attribute from int64_t to int32_t + int32_t Get(const std::string& key, int32_t def_val) const; + std::vector Get(const std::string& key, const std::vector& def_val) const; + + // Convert the i() or ints() of the attribute from int64_t to uint32_t + uint32_t Get(const std::string& key, uint32_t def_val) const; + std::vector Get(const std::string& key, const std::vector& def_val) const; + + /* + * Get without default. + */ + std::optional GetFloat(const std::string& key) const; + std::optional> GetFloats(const std::string& key) const; + + std::optional GetInt64(const std::string& key) const; + std::optional> GetInt64s(const std::string& key) const; + + std::optional GetString(const std::string& key) const; + + bool HasAttr(const std::string& key) const; + + private: + const NodeAttributes& node_attributes_; +}; +} // namespace onnxruntime diff --git a/onnxruntime/core/providers/qnn/qnn_allocator.cc b/onnxruntime/core/providers/qnn/qnn_allocator.cc new file mode 100644 index 0000000000000..1fb8742f724cd --- /dev/null +++ b/onnxruntime/core/providers/qnn/qnn_allocator.cc @@ -0,0 +1,244 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +#include "core/providers/qnn/qnn_allocator.h" + +#include +#include +#include + +#include "core/providers/qnn/ort_api.h" + +namespace onnxruntime::qnn { + +/** + * HtpSharedMemoryAllocator allocation details + * + * The HTP shared memory allocator will allocate a block of shared memory larger than the amount requested in order to + * hold some additional info. + * Each allocation returned by HtpSharedMemoryAllocator::Alloc() is preceded by an AllocationHeader structure. + * + * For example, if Alloc(num_requested_bytes) is called, this is what the memory layout looks like: + * | AllocationHeader bytes | num_requested_bytes bytes | + * ^- address returned by Alloc() + * + * The AllocationHeader can be used to obtain the owning allocator instance, which in turn can be used to do other + * operations with that allocation, such as retrieving more info about the allocation. + */ + +namespace { + +struct AllocationHeader { + static constexpr std::array kAllocationHeaderMarker{'o', 'r', 't', 'a', 'l', 'l', 'o', 'c'}; + + // Marker bytes to verify as a sanity check. + std::array marker; + + // Pointer to the allocating allocator instance. + // Note: A critical assumption here is that the allocating allocator is not destroyed before the allocation is freed. + HtpSharedMemoryAllocator* allocator_ptr; + + AllocationHeader(HtpSharedMemoryAllocator* allocator_ptr) + : marker{kAllocationHeaderMarker}, + allocator_ptr{allocator_ptr} { + } + + ~AllocationHeader() { + marker.fill('\0'); + allocator_ptr = nullptr; + } +}; + +size_t AllocationAlignment() { + constexpr size_t min_allocation_alignment = 64; // Equal to MlasGetPreferredBufferAlignment() + return std::max(alignof(AllocationHeader), min_allocation_alignment); +} + +size_t DivRoundUp(size_t a, size_t b) { // TODO is there already a helper function somewhere for this? + return (a + b - 1) / b; +} + +bool IsAligned(const void* address, size_t alignment) { + assert((alignment & (alignment - 1)) == 0); // alignment must be a power of two + return (reinterpret_cast(address) & (alignment - 1)) == 0; +} + +size_t AllocationOffsetFromStartOfHeader() { + const size_t allocation_alignment = AllocationAlignment(); + const size_t offset = DivRoundUp(sizeof(AllocationHeader), allocation_alignment) * allocation_alignment; + return offset; +} + +std::byte* GetAllocationHeaderAddress(void* allocation_address) { + auto* allocation_header_address = reinterpret_cast(allocation_address) - sizeof(AllocationHeader); + return allocation_header_address; +} + +AllocationHeader& ValidateAllocationAddressAndGetHeader(void* allocation_address) { + const size_t allocation_alignment = AllocationAlignment(); + ORT_ENFORCE(IsAligned(allocation_address, allocation_alignment), + "Allocation address (", allocation_address, ") does not have required alignment (", + allocation_alignment, " bytes)."); + + auto* allocation_header = reinterpret_cast(GetAllocationHeaderAddress(allocation_address)); + ORT_ENFORCE(allocation_header->marker == AllocationHeader::kAllocationHeaderMarker, + "AllocationHeader for allocation address (", allocation_address, + ") does not have the expected marker bytes."); + + return *allocation_header; +} + +std::unique_ptr WrapSharedMemoryWithUniquePtr(void* shared_memory_raw, + const RpcMemApi& rpcmem_api) { + return {shared_memory_raw, rpcmem_api.free}; +} + +} // namespace + +OrtMemoryInfo HtpSharedMemoryAllocator::AssociatedMemoryInfo() { + return OrtMemoryInfo{QNN_HTP_SHARED, OrtAllocatorType::OrtDeviceAllocator, + OrtDevice{OrtDevice::CPU, OrtDevice::MemType::QNN_HTP_SHARED, /* device_id */ 0}, + /* id */ 0, OrtMemTypeDefault}; +} + +HtpSharedMemoryAllocator::HtpSharedMemoryAllocator(std::shared_ptr rpcmem_lib, + const logging::Logger* logger) + : IAllocator{AssociatedMemoryInfo()}, + rpcmem_lib_{std::move(rpcmem_lib)}, + logger_(logger != nullptr ? *logger : logging::LoggingManager::DefaultLogger()) { + ORT_ENFORCE(rpcmem_lib_ != nullptr); +} + +void* HtpSharedMemoryAllocator::Alloc(size_t requested_size) { + const size_t allocation_offset = AllocationOffsetFromStartOfHeader(); + const size_t shared_memory_block_size_in_bytes = allocation_offset + requested_size; + + // rpcmem_alloc() has an int size parameter. make sure we don't overflow. + // TODO switch to rpcmem_alloc2() which has size_t size parameter. + // need to verify that rpcmem_alloc2() is available in all environments we care about. + const SafeInt shared_memory_block_size_in_bytes_int = shared_memory_block_size_in_bytes; + + // allocate shared memory + void* shared_memory_raw = rpcmem_lib_->Api().alloc(rpcmem::RPCMEM_HEAP_ID_SYSTEM, rpcmem::RPCMEM_DEFAULT_FLAGS, + shared_memory_block_size_in_bytes_int); + ORT_ENFORCE(shared_memory_raw != nullptr, "rpcmem_alloc() failed to allocate and returned nullptr."); + auto shared_memory = WrapSharedMemoryWithUniquePtr(shared_memory_raw, rpcmem_lib_->Api()); + + const size_t allocation_alignment = AllocationAlignment(); + ORT_ENFORCE(IsAligned(shared_memory_raw, allocation_alignment), + "Shared memory address (", shared_memory_raw, ") does not have required alignment (", + allocation_alignment, " bytes)."); + + // get shared memory fd + const auto shared_memory_fd = rpcmem_lib_->Api().to_fd(shared_memory.get()); + ORT_ENFORCE(shared_memory_fd != -1, "rpcmem_to_fd() returned invalid file descriptor."); + + std::byte* allocation_address = reinterpret_cast(shared_memory_raw) + allocation_offset; + + // store allocation record + { + SharedMemoryInfo shared_memory_info{}; + shared_memory_info.fd = shared_memory_fd; + shared_memory_info.offset = allocation_offset; + shared_memory_info.total_size = shared_memory_block_size_in_bytes; + + AllocationRecord allocation_record{}; + allocation_record.shared_memory_info = std::move(shared_memory_info); + + std::scoped_lock g{allocations_mutex_}; + const bool inserted = allocations_.emplace(allocation_address, std::move(allocation_record)).second; + ORT_ENFORCE(inserted, "Allocation record already exists for address (", allocation_address, ")."); + } + + // initialize header + { + std::byte* allocation_header_address = GetAllocationHeaderAddress(allocation_address); + new (allocation_header_address) AllocationHeader(this); + } + + shared_memory.release(); + return allocation_address; +} + +void HtpSharedMemoryAllocator::Free(void* allocation_address) { + if (allocation_address == nullptr) { + return; + } + + auto& allocation_header = ValidateAllocationAddressAndGetHeader(allocation_address); + ORT_ENFORCE(allocation_header.allocator_ptr == this, + "AllocationHeader points to a different allocator (", allocation_header.allocator_ptr, + ") than this one (", this, ")."); + + const auto allocation_node = [this, allocation_address]() { + std::scoped_lock g{allocations_mutex_}; + return allocations_.extract(allocation_address); + }(); + + ORT_ENFORCE(!allocation_node.empty(), "Failed to get allocation info for address (", allocation_address, ")."); + + // At this point, we have a valid allocation to free. + // Avoid throwing exceptions as this may be running from a destructor. + try { + // take ownership of shared memory and free at end of scope + auto shared_memory = WrapSharedMemoryWithUniquePtr(allocation_address, rpcmem_lib_->Api()); + + // destroy header + allocation_header.~AllocationHeader(); + + // clean up allocation record + const auto& allocation_record = allocation_node.mapped(); + for (auto& clean_up_fn : allocation_record.clean_up_fns) { + // attempt to run each clean_up_fn even if exceptions are thrown + try { + clean_up_fn(allocation_address); + } catch (const std::exception& e) { + LOGS(logger_, ERROR) << "Caught exception while running clean up callback for address (" << allocation_address + << "): " << e.what(); + } + } + } catch (const std::exception& e) { + LOGS(logger_, ERROR) << "Caught exception while freeing address (" << allocation_address << "): " << e.what(); + } +} + +Status HtpSharedMemoryAllocator::GetAllocationSharedMemoryInfo(void* allocation_address, + SharedMemoryInfo& allocation_info) { + auto& allocation_header = ValidateAllocationAddressAndGetHeader(allocation_address); + return allocation_header.allocator_ptr->GetAllocationSharedMemoryInfoForThisAllocator(allocation_address, + allocation_info); +} + +Status HtpSharedMemoryAllocator::AddAllocationCleanUp(void* allocation_address, + AllocationCleanUpFn&& allocation_clean_up) { + auto& allocation_header = ValidateAllocationAddressAndGetHeader(allocation_address); + return allocation_header.allocator_ptr->AddAllocationCleanUpForThisAllocator(allocation_address, + std::move(allocation_clean_up)); +} + +Status HtpSharedMemoryAllocator::GetAllocationSharedMemoryInfoForThisAllocator(void* allocation_address, + SharedMemoryInfo& allocation_info) { + std::scoped_lock g{allocations_mutex_}; + const auto allocation_it = allocations_.find(allocation_address); + ORT_RETURN_IF(allocation_it == allocations_.end(), + "Failed to get allocation info for address (", allocation_address, ")."); + + allocation_info = allocation_it->second.shared_memory_info; + return Status::OK(); +} + +Status HtpSharedMemoryAllocator::AddAllocationCleanUpForThisAllocator(void* allocation_address, + AllocationCleanUpFn&& allocation_clean_up) { + ORT_RETURN_IF(allocation_clean_up == nullptr, "allocation_clean_up should not be empty."); + + std::scoped_lock g{allocations_mutex_}; + const auto allocation_it = allocations_.find(allocation_address); + ORT_RETURN_IF(allocation_it == allocations_.end(), + "Failed to get allocation info for address (", allocation_address, ")."); + + auto& clean_up_fns = allocation_it->second.clean_up_fns; + clean_up_fns.emplace_back(std::move(allocation_clean_up)); + return Status::OK(); +} + +} // namespace onnxruntime::qnn diff --git a/onnxruntime/core/providers/qnn/qnn_allocator.h b/onnxruntime/core/providers/qnn/qnn_allocator.h new file mode 100644 index 0000000000000..e64f38f494b35 --- /dev/null +++ b/onnxruntime/core/providers/qnn/qnn_allocator.h @@ -0,0 +1,70 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +#pragma once + +#include +#include + +#include "core/providers/qnn/ort_api.h" +#include "core/providers/qnn/rpcmem_library.h" + +namespace onnxruntime::qnn { + +class HtpSharedMemoryAllocator : public IAllocator { + public: + // Gets the OrtMemoryInfo value that is associated with this allocator type. + static OrtMemoryInfo AssociatedMemoryInfo(); + + HtpSharedMemoryAllocator(std::shared_ptr rpcmem_lib, + const logging::Logger* logger = nullptr); + + ORT_DISALLOW_COPY_ASSIGNMENT_AND_MOVE(HtpSharedMemoryAllocator); + + // IAllocator overrides + + void* Alloc(size_t size) override; + void Free(void* p) override; + // void GetStats(AllocatorStats* stats) override; // TODO override + + struct SharedMemoryInfo { + int fd; + uint64_t offset; + uint64_t total_size; + }; + + // Gets an allocation's shared memory info. + // `allocation_address` identifies the allocation. It must be an address returned by Alloc() which has not yet been + // freed. + static Status GetAllocationSharedMemoryInfo(void* allocation_address, + SharedMemoryInfo& allocation_info); + + using AllocationCleanUpFn = std::function; + + // Adds allocation clean up callback to call when the allocation is freed. + // `allocation_address` identifies the allocation. It must be an address returned by Alloc() which has not yet been + // freed. + // `allocation_clean_up` is the clean up callback. The associated allocator takes ownership of the callback. + static Status AddAllocationCleanUp(void* allocation_address, AllocationCleanUpFn&& allocation_clean_up); + + private: + Status GetAllocationSharedMemoryInfoForThisAllocator(void* allocation_address, + SharedMemoryInfo& allocation_info); + + Status AddAllocationCleanUpForThisAllocator(void* allocation_address, AllocationCleanUpFn&& allocation_clean_up); + + struct AllocationRecord { + SharedMemoryInfo shared_memory_info; + InlinedVector clean_up_fns; + }; + + // allocation address -> corresponding allocation record + InlinedHashMap allocations_; + std::mutex allocations_mutex_; // synchronize access to allocations_ + + std::shared_ptr rpcmem_lib_; + + const logging::Logger& logger_; +}; + +} // namespace onnxruntime::qnn diff --git a/onnxruntime/core/providers/qnn/qnn_execution_provider.cc b/onnxruntime/core/providers/qnn/qnn_execution_provider.cc index 1d9242f8a5939..b1555b6050928 100644 --- a/onnxruntime/core/providers/qnn/qnn_execution_provider.cc +++ b/onnxruntime/core/providers/qnn/qnn_execution_provider.cc @@ -5,58 +5,23 @@ #include #include -#include "core/framework/compute_capability.h" -#include "core/graph/graph_viewer.h" -#include "core/session/onnxruntime_session_options_config_keys.h" -#include "core/session/onnxruntime_run_options_config_keys.h" -#include "core/session/onnxruntime_cxx_api.h" -#include "core/framework/kernel_registry.h" -#include "core/optimizer/qdq_transformer/selectors_actions/qdq_selectors.h" -#include "core/optimizer/qdq_transformer/selectors_actions/shared/utils.h" -#include "core/platform/env.h" -#include "core/providers/common.h" -#include "core/providers/partitioning_utils.h" -#include "core/providers/partitioning_utils.h" -#include "core/providers/qnn/builder/qnn_model_wrapper.h" + +#include "core/providers/qnn/ort_api.h" +#include "core/providers/qnn/builder/onnx_ctx_model_helper.h" #include "core/providers/qnn/builder/op_builder_factory.h" -#include "core/providers/qnn/builder/qnn_node_group.h" #include "core/providers/qnn/builder/qnn_def.h" -#include "core/providers/qnn/builder/onnx_ctx_model_helper.h" -#include "core/framework/run_options.h" - -#ifdef _WIN32 -#include -#include "core/platform/windows/logging/etw_sink.h" -#endif +#include "core/providers/qnn/builder/qnn_model_wrapper.h" +#include "core/providers/qnn/builder/qnn_node_group.h" +#include "core/providers/qnn/builder/qnn_utils.h" +#include "core/providers/qnn/qnn_allocator.h" +#include "core/providers/qnn/qnn_telemetry.h" +#include "core/providers/qnn/rpcmem_library.h" +#include "core/providers/qnn/shared_context.h" namespace onnxruntime { constexpr const char* QNN = "QNN"; -static std::unique_ptr>> s_run_on_unload_; - -void RunOnUnload(std::function function) { - static std::mutex mutex; - std::lock_guard guard(mutex); - if (!s_run_on_unload_) { - s_run_on_unload_ = std::make_unique>>(); - } - s_run_on_unload_->push_back(std::move(function)); -} - -struct OnUnload { - ~OnUnload() { - if (!s_run_on_unload_) - return; - - for (auto& function : *s_run_on_unload_) - function(); - - s_run_on_unload_.reset(); - } - -} g_on_unload; - static void ParseProfilingLevel(std::string profiling_level_string, qnn::ProfilingLevel& profiling_level) { std::transform(profiling_level_string.begin(), @@ -193,17 +158,20 @@ qnn::ProfilingLevel QNNExecutionProvider::GetProfilingLevelFromETWLevel(unsigned } QNNExecutionProvider::QNNExecutionProvider(const ProviderOptions& provider_options_map, - const SessionOptions* session_options) + const ConfigOptions* config_options) : IExecutionProvider{onnxruntime::kQnnExecutionProvider} { - if (session_options) { - disable_cpu_ep_fallback_ = session_options->config_options.GetConfigOrDefault( + InitOrtCppApi(); + metadef_id_generator_ = Factory::Create(); + + if (config_options) { + disable_cpu_ep_fallback_ = config_options->GetConfigOrDefault( kOrtSessionOptionsDisableCPUEPFallback, "0") == "1"; - context_cache_enabled_ = session_options->config_options.GetConfigOrDefault( + context_cache_enabled_ = config_options->GetConfigOrDefault( kOrtSessionOptionEpContextEnable, "0") == "1"; LOGS_DEFAULT(VERBOSE) << "Context cache enable: " << context_cache_enabled_; - std::string embed_mode = session_options->config_options.GetConfigOrDefault( + std::string embed_mode = config_options->GetConfigOrDefault( kOrtSessionOptionEpContextEmbedMode, "0"); if ("1" == embed_mode) { qnn_context_embed_mode_ = true; @@ -214,18 +182,18 @@ QNNExecutionProvider::QNNExecutionProvider(const ProviderOptions& provider_optio } LOGS_DEFAULT(VERBOSE) << "User specified context cache embed mode: " << qnn_context_embed_mode_; - context_cache_path_cfg_ = session_options->config_options.GetConfigOrDefault(kOrtSessionOptionEpContextFilePath, ""); + context_cache_path_cfg_ = config_options->GetConfigOrDefault(kOrtSessionOptionEpContextFilePath, ""); LOGS_DEFAULT(VERBOSE) << "User specified context cache path: " << context_cache_path_cfg_; // For the case that workaround QNN context PD memory limit, user need split the model into pieces and // generate the QNN context model separately. // It could happen that the generated EPContext node in separate graph has same node name. // User can set this context_node_name_prefix for each split pieces to avoid that happens. - context_node_name_prefix_ = session_options->config_options.GetConfigOrDefault(kOrtSessionOptionEpContextNodeNamePrefix, ""); + context_node_name_prefix_ = config_options->GetConfigOrDefault(kOrtSessionOptionEpContextNodeNamePrefix, ""); LOGS_DEFAULT(VERBOSE) << "User specified QNN context node name prefix: " << context_node_name_prefix_; share_ep_contexts_ = - session_options->config_options.GetConfigOrDefault(kOrtSessionOptionShareEpContexts, "0") == "1"; + config_options->GetConfigOrDefault(kOrtSessionOptionShareEpContexts, "0") == "1"; LOGS_DEFAULT(VERBOSE) << "User specified option - share EP contexts across sessions: " << share_ep_contexts_; } @@ -246,8 +214,9 @@ QNNExecutionProvider::QNNExecutionProvider(const ProviderOptions& provider_optio // separate out the profiling level for ETW in case it gets disabled later when we extract the events // set to invalid to indicate that ETW is no enabled when we setup QNN qnn::ProfilingLevel profiling_level_etw = qnn::ProfilingLevel::INVALID; - const Env& env = Env::Default(); - auto& provider = env.GetTelemetryProvider(); + +#ifdef _WIN32 + auto& provider = qnn::QnnTelemetry::Instance(); if (provider.IsEnabled()) { auto level = provider.Level(); auto keyword = provider.Keyword(); @@ -257,6 +226,7 @@ QNNExecutionProvider::QNNExecutionProvider(const ProviderOptions& provider_optio } } } +#endif // defined(_WIN32) // In case ETW gets disabled later auto profiling_level_pos = provider_options_map.find(PROFILING_LEVEL); @@ -390,59 +360,72 @@ QNNExecutionProvider::QNNExecutionProvider(const ProviderOptions& provider_optio << "handles the graph I/O quantization/dequantization."; } - qnn_backend_manager_ = std::make_unique( - std::move(backend_path), - profiling_level_etw, - profiling_level, - std::move(profiling_file_path), - context_priority, - std::move(qnn_saver_path), - device_id_, - htp_arch, - soc_model, - enable_htp_weight_sharing); + static const std::string QNN_HTP_SHARED_MEMORY_ALLOCATOR_ENABLED = "enable_htp_shared_memory_allocator"; + if (ParseBoolOption(QNN_HTP_SHARED_MEMORY_ALLOCATOR_ENABLED, false, provider_options_map)) { + // Initialize rpcmem_library_. + // This is necessary for HtpSharedMemoryAllocator to function and also indicates that the allocator is available. + rpcmem_library_ = std::make_shared(); + } -#ifdef _WIN32 - auto& etwRegistrationManager = logging::EtwRegistrationManager::Instance(); - // Register callback for ETW capture state (rundown) - callback_ETWSink_provider_ = onnxruntime::logging::EtwRegistrationManager::EtwInternalCallback( - [&etwRegistrationManager, this]( - LPCGUID SourceId, - ULONG IsEnabled, - UCHAR Level, - ULONGLONG MatchAnyKeyword, - ULONGLONG MatchAllKeyword, - PEVENT_FILTER_DESCRIPTOR FilterData, - PVOID CallbackContext) { - ORT_UNUSED_PARAMETER(SourceId); - ORT_UNUSED_PARAMETER(MatchAnyKeyword); - ORT_UNUSED_PARAMETER(MatchAllKeyword); - ORT_UNUSED_PARAMETER(FilterData); - ORT_UNUSED_PARAMETER(CallbackContext); - - if (IsEnabled == EVENT_CONTROL_CODE_ENABLE_PROVIDER) { - if ((MatchAnyKeyword & static_cast(onnxruntime::logging::ORTTraceLoggingKeyword::Logs)) != 0) { - auto ortETWSeverity = etwRegistrationManager.MapLevelToSeverity(); - (void)qnn_backend_manager_->ResetQnnLogLevel(ortETWSeverity); - } - if ((MatchAnyKeyword & static_cast(onnxruntime::logging::ORTTraceLoggingKeyword::Profiling)) != 0) { - if (Level != 0) { - // Commenting out Dynamic QNN Profiling for now - // There seems to be a crash in 3rd party QC QnnHtp.dll with this. - // Repro Scenario - start ETW tracing prior to session creation. - // Then disable/enable ETW Tracing with the code below uncommented a few times - // auto profiling_level_etw = GetProfilingLevelFromETWLevel(Level); - // (void)qnn_backend_manager_->SetProfilingLevelETW(profiling_level_etw); + qnn_backend_manager_ = qnn::QnnBackendManager::Create( + qnn::QnnBackendManagerConfig{backend_path, + profiling_level_etw, + profiling_level, + profiling_file_path, + context_priority, + qnn_saver_path, + device_id_, + htp_arch, + soc_model, + enable_htp_weight_sharing}); + +#if defined(_WIN32) + if (onnxruntime::logging::EtwRegistrationManager::SupportsETW()) { + auto& etwRegistrationManager = logging::EtwRegistrationManager::Instance(); + // Register callback for ETW capture state (rundown) + callback_ETWSink_provider_ = onnxruntime::logging::EtwRegistrationManager::EtwInternalCallback( + [&etwRegistrationManager, this]( + LPCGUID SourceId, + ULONG IsEnabled, + UCHAR Level, + ULONGLONG MatchAnyKeyword, + ULONGLONG MatchAllKeyword, + PEVENT_FILTER_DESCRIPTOR FilterData, + PVOID CallbackContext) { + ORT_UNUSED_PARAMETER(SourceId); + ORT_UNUSED_PARAMETER(MatchAnyKeyword); + ORT_UNUSED_PARAMETER(MatchAllKeyword); + ORT_UNUSED_PARAMETER(FilterData); + ORT_UNUSED_PARAMETER(CallbackContext); + + if (IsEnabled == EVENT_CONTROL_CODE_ENABLE_PROVIDER) { + if ((MatchAnyKeyword & static_cast(onnxruntime::logging::ORTTraceLoggingKeyword::Logs)) != 0) { + auto ortETWSeverity = etwRegistrationManager.MapLevelToSeverity(); + (void)qnn_backend_manager_->ResetQnnLogLevel(ortETWSeverity); + } + if ((MatchAnyKeyword & static_cast(onnxruntime::logging::ORTTraceLoggingKeyword::Profiling)) != 0) { + if (Level != 0) { + // Commenting out Dynamic QNN Profiling for now + // There seems to be a crash in 3rd party QC QnnHtp.dll with this. + // Repro Scenario - start ETW tracing prior to session creation. + // Then disable/enable ETW Tracing with the code below uncommented a few times + // auto profiling_level_etw = GetProfilingLevelFromETWLevel(Level); + // (void)qnn_backend_manager_->SetProfilingLevelETW(profiling_level_etw); + // + // NOTE(1/2/2025): It is possible that the above was not working in part because it is using the + // *logging ETW* subsystem to modify profiling, which should use an entirely different + // ETW provider (see QnnTelemetry). Should add callbacks for profiling to the QnnTelemetry ETW provider. + } } } - } - if (IsEnabled == EVENT_CONTROL_CODE_DISABLE_PROVIDER) { - // (void)qnn_backend_manager_->SetProfilingLevelETW(qnn::ProfilingLevel::INVALID); - (void)qnn_backend_manager_->ResetQnnLogLevel(std::nullopt); - } - }); - etwRegistrationManager.RegisterInternalCallback(callback_ETWSink_provider_); + if (IsEnabled == EVENT_CONTROL_CODE_DISABLE_PROVIDER) { + // (void)qnn_backend_manager_->SetProfilingLevelETW(qnn::ProfilingLevel::INVALID); + (void)qnn_backend_manager_->ResetQnnLogLevel(std::nullopt); + } + }); + etwRegistrationManager.RegisterInternalCallback(callback_ETWSink_provider_); + } #endif } @@ -456,7 +439,7 @@ QNNExecutionProvider::~QNNExecutionProvider() { } // Unregister the ETW callback -#ifdef _WIN32 +#if defined(_WIN32) if (callback_ETWSink_provider_ != nullptr) { logging::EtwRegistrationManager::Instance().UnregisterInternalCallback(callback_ETWSink_provider_); } @@ -488,9 +471,10 @@ static void LogNodeSupport(const logging::Logger& logger, oss << "\tREASON : " << support_status.ErrorMessage() << std::endl; } - logging::Capture(logger, log_severity, logging::Category::onnxruntime, - log_data_type, call_site) - .Stream() + auto log_capture = Factory::Create(logger, log_severity, + logging::Category::onnxruntime, + log_data_type, call_site); + log_capture->Stream() << (support_status.IsOK() ? "Validation PASSED " : "Validation FAILED ") << "for " << num_nodes << " nodes in " << qnn_node_group.Type() << " (" << qnn_node_group.GetTargetNodeUnit()->OpType() << ") :" << std::endl @@ -594,11 +578,11 @@ static bool EpSharedContextsHasAllGraphs(const std::vectorName(); + const std::string& graph_name = ep_context_node.Name(); bool has_shared_qnn_model = SharedContext::GetInstance().HasQnnModel(graph_name); if (!has_shared_qnn_model) { LOGS(logger, VERBOSE) << "Graph: " << graph_name << " from EpContext node not found from shared EP contexts."; @@ -613,7 +597,7 @@ static bool EpSharedContextsHasAllGraphs(const std::vector>& result, - const utils::GenerateMetadefNameFn& gen_metadef_name, + const std::function& gen_metadef_name, const logging::Logger& logger) { std::unordered_set supported_nodes{}; std::vector> supported_groups{}; @@ -673,7 +657,7 @@ QNNExecutionProvider::GetCapability(const onnxruntime::GraphViewer& graph_viewer const auto gen_metadef_name = [&]() { uint64_t model_hash; - int metadef_id = metadef_id_generator_.GenerateId(graph_viewer, model_hash); + int metadef_id = metadef_id_generator_->GenerateId(graph_viewer, model_hash); return MakeString(QNN, context_node_name_prefix_, "_", model_hash, "_", metadef_id); }; @@ -697,6 +681,12 @@ QNNExecutionProvider::GetCapability(const onnxruntime::GraphViewer& graph_viewer return result; } + if (IsNpuBackend(qnn_backend_manager_->GetQnnBackendType())) { + // Set the power config id and the default power mode from provider option for main thread, + // otherwise it will mess up the power mode if user just create session without run it. + GetPerThreadContext(); + } + // Report error if QNN CPU backend is loaded while CPU fallback is disabled if (disable_cpu_ep_fallback_ && qnn_backend_manager_->GetQnnBackendType() == qnn::QnnBackendType::CPU) { LOGS(logger, ERROR) << "Qnn CPU backend is loaded while CPU fallback is disabled."; @@ -718,7 +708,7 @@ QNNExecutionProvider::GetCapability(const onnxruntime::GraphViewer& graph_viewer std::vector> node_unit_holder; std::unordered_map node_unit_map; - std::tie(node_unit_holder, node_unit_map) = QDQ::GetAllNodeUnits(graph_viewer, logger); + std::tie(node_unit_holder, node_unit_map) = GetQDQNodeUnits(graph_viewer, logger); // remove is_qnn_ctx_model related code const auto supported_nodes = GetSupportedNodes(graph_viewer, node_unit_map, @@ -761,11 +751,14 @@ QNNExecutionProvider::GetCapability(const onnxruntime::GraphViewer& graph_viewer bool is_valid_partition = true; size_t nodes_in_partition = 0; - if (partition && partition->sub_graph) { - nodes_in_partition = partition->sub_graph->nodes.size(); + if (partition && ComputeCapability__SubGraph(*partition)) { + const auto& subgraph = ComputeCapability__SubGraph(*partition); + const auto& subgraph_nodes = IndexedSubGraph__Nodes(*subgraph); + + nodes_in_partition = subgraph_nodes.size(); if (nodes_in_partition == 1 && !is_qnn_ctx_model) { - const Node* node = graph_viewer.GetNode(partition->sub_graph->nodes[0]); + const Node* node = graph_viewer.GetNode(subgraph_nodes[0]); if (!node) { LOGS(logger, ERROR) << "QNN EP: Invalid node in partition of one node."; @@ -834,34 +827,34 @@ Status QNNExecutionProvider::CreateComputeFunc(std::vector& nod void QNNExecutionProvider::InitQnnGraphConfigs(qnn::QnnConfigsBuilder& configs_builder) const { if (qnn_backend_manager_->GetQnnBackendType() == qnn::QnnBackendType::HTP) { if (htp_graph_finalization_opt_mode_ != qnn::HtpGraphFinalizationOptimizationMode::kDefault) { - QnnHtpGraph_CustomConfig_t& htp_graph_opt_config = configs_builder.PushCustomConfig(); - htp_graph_opt_config.option = QNN_HTP_GRAPH_CONFIG_OPTION_OPTIMIZATION; - htp_graph_opt_config.optimizationOption.type = QNN_HTP_GRAPH_OPTIMIZATION_TYPE_FINALIZE_OPTIMIZATION_FLAG; - htp_graph_opt_config.optimizationOption.floatValue = static_cast(htp_graph_finalization_opt_mode_); - - QnnGraph_Config_t& graph_opt_config = configs_builder.PushConfig(); - graph_opt_config.option = QNN_GRAPH_CONFIG_OPTION_CUSTOM; - graph_opt_config.customConfig = &htp_graph_opt_config; + gsl::not_null htp_graph_opt_config = configs_builder.PushCustomConfig(); + htp_graph_opt_config->option = QNN_HTP_GRAPH_CONFIG_OPTION_OPTIMIZATION; + htp_graph_opt_config->optimizationOption.type = QNN_HTP_GRAPH_OPTIMIZATION_TYPE_FINALIZE_OPTIMIZATION_FLAG; + htp_graph_opt_config->optimizationOption.floatValue = static_cast(htp_graph_finalization_opt_mode_); + + gsl::not_null graph_opt_config = configs_builder.PushConfig(); + graph_opt_config->option = QNN_GRAPH_CONFIG_OPTION_CUSTOM; + graph_opt_config->customConfig = htp_graph_opt_config; } if (vtcm_size_in_mb_ > 0) { - QnnHtpGraph_CustomConfig_t& htp_graph_opt_config_vtcm = configs_builder.PushCustomConfig(); - htp_graph_opt_config_vtcm.option = QNN_HTP_GRAPH_CONFIG_OPTION_VTCM_SIZE; - htp_graph_opt_config_vtcm.vtcmSizeInMB = static_cast(vtcm_size_in_mb_); + gsl::not_null htp_graph_opt_config_vtcm = configs_builder.PushCustomConfig(); + htp_graph_opt_config_vtcm->option = QNN_HTP_GRAPH_CONFIG_OPTION_VTCM_SIZE; + htp_graph_opt_config_vtcm->vtcmSizeInMB = static_cast(vtcm_size_in_mb_); - QnnGraph_Config_t& graph_opt_config_vtcm = configs_builder.PushConfig(); - graph_opt_config_vtcm.option = QNN_GRAPH_CONFIG_OPTION_CUSTOM; - graph_opt_config_vtcm.customConfig = &htp_graph_opt_config_vtcm; + gsl::not_null graph_opt_config_vtcm = configs_builder.PushConfig(); + graph_opt_config_vtcm->option = QNN_GRAPH_CONFIG_OPTION_CUSTOM; + graph_opt_config_vtcm->customConfig = htp_graph_opt_config_vtcm; } if (enable_HTP_FP16_precision_) { - QnnHtpGraph_CustomConfig_t& htp_graph_precision_config = configs_builder.PushCustomConfig(); - htp_graph_precision_config.option = QNN_HTP_GRAPH_CONFIG_OPTION_PRECISION; - htp_graph_precision_config.precision = QNN_PRECISION_FLOAT16; + gsl::not_null htp_graph_precision_config = configs_builder.PushCustomConfig(); + htp_graph_precision_config->option = QNN_HTP_GRAPH_CONFIG_OPTION_PRECISION; + htp_graph_precision_config->precision = QNN_PRECISION_FLOAT16; - QnnGraph_Config_t& graph_precision_config = configs_builder.PushConfig(); - graph_precision_config.option = QNN_GRAPH_CONFIG_OPTION_CUSTOM; - graph_precision_config.customConfig = &htp_graph_precision_config; + gsl::not_null graph_precision_config = configs_builder.PushConfig(); + graph_precision_config->option = QNN_GRAPH_CONFIG_OPTION_CUSTOM; + graph_precision_config->customConfig = htp_graph_precision_config; } } } @@ -895,7 +888,6 @@ Status QNNExecutionProvider::CompileFromOrtGraph(const std::vector& fused_nodes_and_graphs, std::vector& node_compute_funcs) { const auto& logger = *GetLogger(); - bool is_qnn_ctx_model = qnn::IsFusedGraphHasCtxNode(fused_nodes_and_graphs); onnxruntime::PathString context_cache_path; @@ -918,10 +910,10 @@ Status QNNExecutionProvider::Compile(const std::vector& fused if (EpSharedContextsHasAllGraphs(fused_nodes_and_graphs, logger)) { for (auto fused_node_and_graph : fused_nodes_and_graphs) { const onnxruntime::GraphViewer& graph_viewer(fused_node_and_graph.filtered_graph); - const auto& ep_context_node = graph_viewer.Nodes().begin(); + const Node& ep_context_node = *graph_viewer.Nodes().begin(); const Node& fused_node = fused_node_and_graph.fused_node; const std::string& graph_meta_id = fused_node.Name(); - std::string key = ep_context_node->Name(); + std::string key = ep_context_node.Name(); auto qnn_model_shared = SharedContext::GetInstance().GetSharedQnnModel(key); ORT_RETURN_IF(nullptr == qnn_model_shared, "Graph: " + key + " not found from shared EP contexts."); ORT_RETURN_IF_ERROR(qnn_model_shared->SetGraphInputOutputInfo(graph_viewer, fused_node, logger)); @@ -963,10 +955,10 @@ Status QNNExecutionProvider::Compile(const std::vector& fused for (auto fused_node_and_graph : fused_nodes_and_graphs) { const onnxruntime::GraphViewer& graph_viewer(fused_node_and_graph.filtered_graph); - const auto& ep_context_node = graph_viewer.Nodes().begin(); + const Node& ep_context_node = *graph_viewer.Nodes().begin(); const Node& fused_node = fused_node_and_graph.fused_node; const std::string& graph_meta_id = fused_node.Name(); - std::string key = ep_context_node->Name(); + std::string key = ep_context_node.Name(); ORT_RETURN_IF(qnn_models.find(key) == qnn_models.end(), key + " key name not exist in table qnn_models."); auto qnn_model = std::move(qnn_models[key]); ORT_RETURN_IF_ERROR(qnn_model->SetGraphInputOutputInfo(graph_viewer, fused_node, logger)); @@ -1007,7 +999,7 @@ Status QNNExecutionProvider::Compile(const std::vector& fused buffer_size, max_spill_fill_buffer_size)); } - qnn_ep_context_model_ = std::make_unique("qnn_ep_context_model", false, logger); + qnn_ep_context_model_ = Factory::Create(std::string{"qnn_ep_context_model"}, false, logger); ORT_RETURN_IF_ERROR(qnn::CreateEPContextNodes(qnn_ep_context_model_.get(), context_buffer.get(), buffer_size, @@ -1026,8 +1018,8 @@ const InlinedVector QNNExecutionProvider::GetEpContextNodes() const InlinedVector ep_context_nodes; if (qnn_ep_context_model_) { const auto& graph = qnn_ep_context_model_->MainGraph(); - for (const auto& node : graph.Nodes()) { - ep_context_nodes.push_back(graph.GetNode(node.Index())); + for (gsl::not_null node : Graph__Nodes(graph)) { + ep_context_nodes.push_back(graph.GetNode(node->Index())); } } @@ -1118,22 +1110,34 @@ void QNNExecutionProvider::ReleasePerThreadContext() const { per_thread_context_cache->erase(cached_context_it); } +static bool TryGetConfigEntry(const ConfigOptions& config_options, const std::string& key, std::string& value) { + std::optional new_value = config_options.GetConfigEntry(key); + if (!new_value.has_value()) { + return false; + } + + value = *new_value; + return true; +} + Status QNNExecutionProvider::OnRunStart(const onnxruntime::RunOptions& run_options) { auto backend_type = qnn_backend_manager_->GetQnnBackendType(); if (qnn::QnnBackendType::HTP != backend_type && qnn::QnnBackendType::DSP != backend_type) { return Status::OK(); } + const ConfigOptions& config_options = RunOptions__GetConfigOptions(run_options); + std::string htp_perf_mode = ""; qnn::HtpPerformanceMode htp_performance_mode = qnn::HtpPerformanceMode::kHtpDefault; - if (run_options.config_options.TryGetConfigEntry(kOrtRunOptionsConfigQnnPerfMode, htp_perf_mode)) { + if (TryGetConfigEntry(config_options, kOrtRunOptionsConfigQnnPerfMode, htp_perf_mode)) { // set power mode ParseHtpPerformanceMode(htp_perf_mode, htp_performance_mode); } std::string rpc_latency = ""; uint32_t rpc_control_latency = 0; - if (run_options.config_options.TryGetConfigEntry(kOrtRunOptionsConfigQnnRpcControlLatency, rpc_latency)) { + if (TryGetConfigEntry(config_options, kOrtRunOptionsConfigQnnRpcControlLatency, rpc_latency)) { rpc_control_latency = static_cast(std::stoul(rpc_latency)); LOGS_DEFAULT(VERBOSE) << "rpc_control_latency: " << rpc_control_latency; } @@ -1159,9 +1163,11 @@ Status QNNExecutionProvider::OnRunEnd(bool /*sync_stream*/, const onnxruntime::R return Status::OK(); } + const ConfigOptions& config_options = RunOptions__GetConfigOptions(run_options); + std::string htp_perf_mode = ""; qnn::HtpPerformanceMode htp_performance_mode = qnn::HtpPerformanceMode::kHtpDefault; - if (run_options.config_options.TryGetConfigEntry(kOrtRunOptionsConfigQnnPerfModePostRun, htp_perf_mode)) { + if (TryGetConfigEntry(config_options, kOrtRunOptionsConfigQnnPerfModePostRun, htp_perf_mode)) { // set power mode ParseHtpPerformanceMode(htp_perf_mode, htp_performance_mode); } @@ -1176,4 +1182,25 @@ Status QNNExecutionProvider::OnRunEnd(bool /*sync_stream*/, const onnxruntime::R return Status::OK(); } + +std::vector QNNExecutionProvider::CreatePreferredAllocators() { + std::vector allocators{}; + + if (IsHtpSharedMemoryAllocatorAvailable()) { + LOGS_DEFAULT(INFO) << "Creating HtpSharedMemoryAllocator."; + + AllocatorFactory rpcmem_allocator_factory = [this](OrtDevice::DeviceId) { + return std::make_unique(rpcmem_library_); + }; + + AllocatorCreationInfo rpcmem_allocator_creation_info{rpcmem_allocator_factory, + /* device_id */ 0, + /* use_arena */ false}; + + allocators.emplace_back(CreateAllocator(rpcmem_allocator_creation_info)); + } + + return allocators; +} + } // namespace onnxruntime diff --git a/onnxruntime/core/providers/qnn/qnn_execution_provider.h b/onnxruntime/core/providers/qnn/qnn_execution_provider.h index a0577e8fd87f2..48f41c4da384f 100644 --- a/onnxruntime/core/providers/qnn/qnn_execution_provider.h +++ b/onnxruntime/core/providers/qnn/qnn_execution_provider.h @@ -3,91 +3,25 @@ #pragma once -#include "core/framework/execution_provider.h" -#include "core/framework/session_options.h" -#include "core/framework/model_metadef_id_generator.h" -#include "core/graph/model.h" +#include +#include #include +#include +#include + +#include "core/providers/qnn/ort_api.h" #include "core/providers/qnn/builder/qnn_backend_manager.h" #include "core/providers/qnn/builder/qnn_model.h" #include "core/providers/qnn/builder/qnn_configs_helper.h" +#include "core/providers/qnn/rpcmem_library.h" #include "HTP/QnnHtpGraph.h" -#include -#include -#include -#ifdef _WIN32 -#include "core/platform/windows/logging/etw_sink.h" -#endif namespace onnxruntime { -void RunOnUnload(std::function function); - -class SharedContext { - public: - static SharedContext& GetInstance() { - static SharedContext instance_; - return instance_; - } - - bool HasSharedQnnModels() { - const std::lock_guard lock(mtx_); - return !shared_qnn_models_.empty(); - } - - bool HasQnnModel(const std::string& model_name) { - auto it = find_if(shared_qnn_models_.begin(), shared_qnn_models_.end(), - [&model_name](const std::unique_ptr& qnn_model) { return qnn_model->Name() == model_name; }); - return it != shared_qnn_models_.end(); - } - - std::unique_ptr GetSharedQnnModel(const std::string& model_name) { - const std::lock_guard lock(mtx_); - auto it = find_if(shared_qnn_models_.begin(), shared_qnn_models_.end(), - [&model_name](const std::unique_ptr& qnn_model) { return qnn_model->Name() == model_name; }); - if (it == shared_qnn_models_.end()) { - return nullptr; - } - auto qnn_model = std::move(*it); - shared_qnn_models_.erase(it); - return qnn_model; - } - - bool SetSharedQnnModel(std::vector>&& shared_qnn_models, - std::string& duplicate_graph_names) { - const std::lock_guard lock(mtx_); - bool graph_exist = false; - for (auto& shared_qnn_model : shared_qnn_models) { - auto& model_name = shared_qnn_model->Name(); - auto it = find_if(shared_qnn_models_.begin(), shared_qnn_models_.end(), - [&model_name](const std::unique_ptr& qnn_model) { return qnn_model->Name() == model_name; }); - if (it == shared_qnn_models_.end()) { - shared_qnn_models_.push_back(std::move(shared_qnn_model)); - } else { - duplicate_graph_names.append(model_name + " "); - graph_exist = true; - } - } - - return graph_exist; - } - - private: - SharedContext() = default; - ~SharedContext() = default; - SharedContext(const SharedContext&) = delete; - SharedContext& operator=(const SharedContext&) = delete; - - std::vector> shared_qnn_models_; - // Producer sessions can be in parallel - // Consumer sessions have to be after producer sessions initialized - std::mutex mtx_; -}; - // Logical device representation. class QNNExecutionProvider : public IExecutionProvider { public: - explicit QNNExecutionProvider(const ProviderOptions& provider_options_map, const SessionOptions* session_options); + explicit QNNExecutionProvider(const ProviderOptions& provider_options_map, const ConfigOptions* config_options); virtual ~QNNExecutionProvider(); ORT_DISALLOW_COPY_ASSIGNMENT_AND_MOVE(QNNExecutionProvider); @@ -113,6 +47,8 @@ class QNNExecutionProvider : public IExecutionProvider { Status OnRunEnd(bool sync_stream, const onnxruntime::RunOptions& run_options) override; + std::vector CreatePreferredAllocators() override; + private: std::unordered_set GetSupportedNodes(const GraphViewer& graph_viewer, const std::unordered_map& node_unit_map, @@ -132,9 +68,13 @@ class QNNExecutionProvider : public IExecutionProvider { qnn::ProfilingLevel GetProfilingLevelFromETWLevel(unsigned char level); + bool IsHtpSharedMemoryAllocatorAvailable() const { return rpcmem_library_ != nullptr; } + private: qnn::HtpGraphFinalizationOptimizationMode htp_graph_finalization_opt_mode_ = qnn::HtpGraphFinalizationOptimizationMode::kDefault; - std::unique_ptr qnn_backend_manager_; + // Note: Using shared_ptr so that we can refer to it with a weak_ptr from a + // HtpSharedMemoryAllocator allocation cleanup callback. + std::shared_ptr qnn_backend_manager_; std::unordered_map> qnn_models_; bool context_cache_enabled_ = false; std::string context_cache_path_cfg_ = ""; @@ -143,18 +83,22 @@ class QNNExecutionProvider : public IExecutionProvider { bool qnn_context_embed_mode_ = true; int32_t vtcm_size_in_mb_ = 0; std::unique_ptr qnn_ep_context_model_; - ModelMetadefIdGenerator metadef_id_generator_; + std::unique_ptr metadef_id_generator_; uint32_t device_id_ = 0; qnn::HtpPerformanceMode default_htp_performance_mode_ = qnn::HtpPerformanceMode::kHtpDefault; uint32_t default_rpc_control_latency_ = 0; bool enable_HTP_FP16_precision_ = true; bool share_ep_contexts_ = false; bool enable_spill_fill_buffer_ = false; -#ifdef _WIN32 +#if defined(_WIN32) onnxruntime::logging::EtwRegistrationManager::EtwInternalCallback callback_ETWSink_provider_ = nullptr; #endif qnn::ModelSettings model_settings_ = {}; + // Whether this is set depends on a session option enabling it and if the RPCMEM dynamic library is available. + // This is potentially shared with HtpSharedMemoryAllocator which may be returned by CreatePreferredAllocators(). + std::shared_ptr rpcmem_library_ = nullptr; + class PerThreadContext final { public: PerThreadContext(qnn::QnnBackendManager* qnn_backend_manager, diff --git a/onnxruntime/core/providers/qnn/qnn_provider_factory.cc b/onnxruntime/core/providers/qnn/qnn_provider_factory.cc index 4095d7ff02a33..d4dd446751359 100644 --- a/onnxruntime/core/providers/qnn/qnn_provider_factory.cc +++ b/onnxruntime/core/providers/qnn/qnn_provider_factory.cc @@ -2,32 +2,68 @@ // Licensed under the MIT License #include "core/providers/qnn/qnn_provider_factory_creator.h" - -#include "core/session/abi_session_options_impl.h" #include "core/providers/qnn/qnn_execution_provider.h" -#include "core/session/ort_apis.h" namespace onnxruntime { struct QNNProviderFactory : IExecutionProviderFactory { - QNNProviderFactory(const ProviderOptions& provider_options_map, const SessionOptions* session_options) - : provider_options_map_(provider_options_map), session_options_(session_options) { + QNNProviderFactory(const ProviderOptions& provider_options_map, const ConfigOptions* config_options) + : provider_options_map_(provider_options_map), config_options_(config_options) { } ~QNNProviderFactory() override { } std::unique_ptr CreateProvider() override { - return std::make_unique(provider_options_map_, session_options_); + return std::make_unique(provider_options_map_, config_options_); } private: ProviderOptions provider_options_map_; - const SessionOptions* session_options_; + const ConfigOptions* config_options_; }; +#if BUILD_QNN_EP_STATIC_LIB std::shared_ptr QNNProviderFactoryCreator::Create(const ProviderOptions& provider_options_map, const SessionOptions* session_options) { - return std::make_shared(provider_options_map, session_options); + const ConfigOptions* config_options = nullptr; + if (session_options != nullptr) { + config_options = &session_options->config_options; + } + + return std::make_shared(provider_options_map, config_options); } +#else +struct QNN_Provider : Provider { + std::shared_ptr CreateExecutionProviderFactory(const void* param) override { + if (param == nullptr) { + LOGS_DEFAULT(ERROR) << "[QNN EP] Passed NULL options to CreateExecutionProviderFactory()"; + return nullptr; + } + + std::array pointers_array = *reinterpret_cast*>(param); + const ProviderOptions* provider_options = reinterpret_cast(pointers_array[0]); + const ConfigOptions* config_options = reinterpret_cast(pointers_array[1]); + + if (provider_options == nullptr) { + LOGS_DEFAULT(ERROR) << "[QNN EP] Passed NULL ProviderOptions to CreateExecutionProviderFactory()"; + return nullptr; + } + + return std::make_shared(*provider_options, config_options); + } + + void Initialize() override {} + void Shutdown() override {} +} g_provider; +#endif // BUILD_QNN_EP_STATIC_LIB } // namespace onnxruntime + +#if !BUILD_QNN_EP_STATIC_LIB +extern "C" { + +ORT_API(onnxruntime::Provider*, GetProvider) { + return &onnxruntime::g_provider; +} +} +#endif // !BUILD_QNN_EP_STATIC_LIB diff --git a/onnxruntime/core/providers/qnn/qnn_provider_factory_creator.h b/onnxruntime/core/providers/qnn/qnn_provider_factory_creator.h index 80f9d99b804e7..46b6c15b40553 100644 --- a/onnxruntime/core/providers/qnn/qnn_provider_factory_creator.h +++ b/onnxruntime/core/providers/qnn/qnn_provider_factory_creator.h @@ -11,6 +11,9 @@ namespace onnxruntime { struct SessionOptions; +// Defined in core/session/provider_bridge_ort.cc if built as a shared library (default build config). +// Defined in core/providers/qnn/qnn_provider_factory.cc if built as a static library. +// The preprocessor macro `BUILD_QNN_EP_STATIC_LIB` is defined and set to 1 if QNN is built as a static library. struct QNNProviderFactoryCreator { static std::shared_ptr Create(const ProviderOptions& provider_options_map, const SessionOptions* session_options); diff --git a/onnxruntime/core/providers/qnn/qnn_telemetry.cc b/onnxruntime/core/providers/qnn/qnn_telemetry.cc new file mode 100644 index 0000000000000..b2c8350bfe8ca --- /dev/null +++ b/onnxruntime/core/providers/qnn/qnn_telemetry.cc @@ -0,0 +1,211 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +#include "core/providers/qnn/qnn_telemetry.h" + +#ifdef _WIN32 +#if !BUILD_QNN_EP_STATIC_LIB +// ETW includes +// need space after Windows.h to prevent clang-format re-ordering breaking the build. +// TraceLoggingProvider.h must follow Windows.h +#include + +#ifdef _MSC_VER +#pragma warning(push) +#pragma warning(disable : 26440) // Warning C26440 from TRACELOGGING_DEFINE_PROVIDER +#endif + +#include +#include +#include +#include "core/platform/windows/TraceLoggingConfig.h" + +// Seems this workaround can be dropped when we drop support for VS2017 toolchains +// https://developercommunity.visualstudio.com/content/problem/85934/traceloggingproviderh-is-incompatible-with-utf-8.html +#ifdef _TlgPragmaUtf8Begin +#undef _TlgPragmaUtf8Begin +#define _TlgPragmaUtf8Begin +#endif + +#ifdef _TlgPragmaUtf8End +#undef _TlgPragmaUtf8End +#define _TlgPragmaUtf8End +#endif + +// Different versions of TraceLoggingProvider.h contain different macro variable names for the utf8 begin and end, +// and we need to cover the lower case version as well. +#ifdef _tlgPragmaUtf8Begin +#undef _tlgPragmaUtf8Begin +#define _tlgPragmaUtf8Begin +#endif + +#ifdef _tlgPragmaUtf8End +#undef _tlgPragmaUtf8End +#define _tlgPragmaUtf8End +#endif + +TRACELOGGING_DEFINE_PROVIDER(telemetry_provider_handle, "Microsoft.ML.ONNXRuntime", + // {3a26b1ff-7484-7484-7484-15261f42614d} + (0x3a26b1ff, 0x7484, 0x7484, 0x74, 0x84, 0x15, 0x26, 0x1f, 0x42, 0x61, 0x4d), + TraceLoggingOptionMicrosoftTelemetry()); + +#ifdef _MSC_VER +#pragma warning(pop) +#endif +#endif // !BUILD_QNN_EP_STATIC_LIB + +#include "core/providers/qnn/ort_api.h" + +namespace onnxruntime { +namespace qnn { + +#if !BUILD_QNN_EP_STATIC_LIB +std::mutex QnnTelemetry::mutex_; +std::mutex QnnTelemetry::provider_change_mutex_; +uint32_t QnnTelemetry::global_register_count_ = 0; +bool QnnTelemetry::enabled_ = true; +UCHAR QnnTelemetry::level_ = 0; +UINT64 QnnTelemetry::keyword_ = 0; +std::vector QnnTelemetry::callbacks_; +std::mutex QnnTelemetry::callbacks_mutex_; +#endif // !BUILD_QNN_EP_STATIC_LIB + +QnnTelemetry::QnnTelemetry() { +#if !BUILD_QNN_EP_STATIC_LIB + std::lock_guard lock(mutex_); + if (global_register_count_ == 0) { + // TraceLoggingRegister is fancy in that you can only register once GLOBALLY for the whole process + HRESULT hr = TraceLoggingRegisterEx(telemetry_provider_handle, ORT_TL_EtwEnableCallback, nullptr); + if (SUCCEEDED(hr)) { + global_register_count_ += 1; + } + } +#endif // !BUILD_QNN_EP_STATIC_LIB +} + +QnnTelemetry::~QnnTelemetry() { +#if !BUILD_QNN_EP_STATIC_LIB + std::lock_guard lock(mutex_); + if (global_register_count_ > 0) { + global_register_count_ -= 1; + if (global_register_count_ == 0) { + TraceLoggingUnregister(telemetry_provider_handle); + } + } + + std::lock_guard lock_callbacks(callbacks_mutex_); + callbacks_.clear(); +#endif // !BUILD_QNN_EP_STATIC_LIB +} + +QnnTelemetry& QnnTelemetry::Instance() { + static QnnTelemetry instance; + return instance; +} + +bool QnnTelemetry::IsEnabled() const { +#if BUILD_QNN_EP_STATIC_LIB + const Env& env = GetDefaultEnv(); + auto& provider = env.GetTelemetryProvider(); + return provider.IsEnabled(); +#else + std::lock_guard lock(provider_change_mutex_); + return enabled_; +#endif +} + +UCHAR QnnTelemetry::Level() const { +#if BUILD_QNN_EP_STATIC_LIB + const Env& env = GetDefaultEnv(); + auto& provider = env.GetTelemetryProvider(); + return provider.Level(); +#else + std::lock_guard lock(provider_change_mutex_); + return level_; +#endif +} + +UINT64 QnnTelemetry::Keyword() const { +#if BUILD_QNN_EP_STATIC_LIB + const Env& env = GetDefaultEnv(); + auto& provider = env.GetTelemetryProvider(); + return provider.Keyword(); +#else + std::lock_guard lock(provider_change_mutex_); + return keyword_; +#endif +} + +void QnnTelemetry::LogQnnProfileEvent(uint64_t timestamp, + const std::string& message, + const std::string& qnnScalarValue, + const std::string& unit, + const std::string& timingSource, + const std::string& eventLevel, + const char* eventIdentifier) const { + TraceLoggingWrite( + telemetry_provider_handle, + "QNNProfilingEvent", + TraceLoggingKeyword(static_cast(onnxruntime::logging::ORTTraceLoggingKeyword::Profiling)), + TraceLoggingLevel(WINEVENT_LEVEL_VERBOSE), + TraceLoggingValue(timestamp, "Timestamp"), + TraceLoggingString(message.c_str(), "Message"), + TraceLoggingString(qnnScalarValue.c_str(), "Value"), + TraceLoggingString(unit.c_str(), "Unit of Measurement"), + TraceLoggingString(timingSource.c_str(), "Timing Source"), + TraceLoggingString(eventLevel.c_str(), "Event Level"), + TraceLoggingString(eventIdentifier, "Event Identifier")); +} + +void QnnTelemetry::RegisterInternalCallback(const EtwInternalCallback& callback) { +#if BUILD_QNN_EP_STATIC_LIB + WindowsTelemetry::RegisterInternalCallback(callback); +#else + std::lock_guard lock_callbacks(callbacks_mutex_); + callbacks_.push_back(&callback); +#endif +} + +void QnnTelemetry::UnregisterInternalCallback(const EtwInternalCallback& callback) { +#if BUILD_QNN_EP_STATIC_LIB + WindowsTelemetry::UnregisterInternalCallback(callback); +#else + std::lock_guard lock_callbacks(callbacks_mutex_); + auto new_end = std::remove_if(callbacks_.begin(), callbacks_.end(), + [&callback](const EtwInternalCallback* ptr) { + return ptr == &callback; + }); + callbacks_.erase(new_end, callbacks_.end()); +#endif +} + +#if !BUILD_QNN_EP_STATIC_LIB +void NTAPI QnnTelemetry::ORT_TL_EtwEnableCallback( + _In_ LPCGUID SourceId, + _In_ ULONG IsEnabled, + _In_ UCHAR Level, + _In_ ULONGLONG MatchAnyKeyword, + _In_ ULONGLONG MatchAllKeyword, + _In_opt_ PEVENT_FILTER_DESCRIPTOR FilterData, + _In_opt_ PVOID CallbackContext) { + std::lock_guard lock(provider_change_mutex_); + enabled_ = (IsEnabled != 0); + level_ = Level; + keyword_ = MatchAnyKeyword; + + InvokeCallbacks(SourceId, IsEnabled, Level, MatchAnyKeyword, MatchAllKeyword, FilterData, CallbackContext); +} + +void QnnTelemetry::InvokeCallbacks(LPCGUID SourceId, ULONG IsEnabled, UCHAR Level, ULONGLONG MatchAnyKeyword, + ULONGLONG MatchAllKeyword, PEVENT_FILTER_DESCRIPTOR FilterData, + PVOID CallbackContext) { + std::lock_guard lock_callbacks(callbacks_mutex_); + for (const auto& callback : callbacks_) { + (*callback)(SourceId, IsEnabled, Level, MatchAnyKeyword, MatchAllKeyword, FilterData, CallbackContext); + } +} +#endif // !BUILD_QNN_EP_STATIC_LIB + +} // namespace qnn +} // namespace onnxruntime +#endif // defined(_WIN32) diff --git a/onnxruntime/core/providers/qnn/qnn_telemetry.h b/onnxruntime/core/providers/qnn/qnn_telemetry.h new file mode 100644 index 0000000000000..a2d42c518c1ac --- /dev/null +++ b/onnxruntime/core/providers/qnn/qnn_telemetry.h @@ -0,0 +1,98 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +#pragma once +#ifdef _WIN32 +#include + +#if !BUILD_QNN_EP_STATIC_LIB +#include +#endif + +#include +#include +#include +#include + +#include "core/providers/qnn/ort_api.h" + +#if !BUILD_QNN_EP_STATIC_LIB +TRACELOGGING_DECLARE_PROVIDER(telemetry_provider_handle); +#endif + +namespace onnxruntime { +namespace qnn { + +/// +/// Singleton class used to log QNN profiling events to the ONNX Runtime telemetry tracelogging provider. +/// +/// When QNN EP is a DLL, we must define our own tracelogging provider handle via TRACELOGGING_DEFINE_PROVIDER. +/// TraceLogging documentation states that separate DLLs cannot share the same tracelogging provider handle. See: +/// https://learn.microsoft.com/en-us/windows/win32/api/traceloggingprovider/nf-traceloggingprovider-tracelogging_define_provider#remarks +/// +/// When QNN EP is a static library, we use the tracelogging provider handle already defined +/// in core/platform/windows/telemetry.h/.cc. In this case, we forward method calls to the +/// ORT Env's telemetry provider. +/// +class QnnTelemetry { + public: + static QnnTelemetry& Instance(); + bool IsEnabled() const; + + // Get the current logging level + unsigned char Level() const; + + // Get the current keyword + UINT64 Keyword() const; + + // Logs QNN profiling event as trace logging event. + void LogQnnProfileEvent(uint64_t timestamp, + const std::string& message, + const std::string& qnnScalarValue, + const std::string& unit, + const std::string& timingSource, + const std::string& eventLevel, + const char* eventIdentifier) const; + + using EtwInternalCallback = std::function; + + static void RegisterInternalCallback(const EtwInternalCallback& callback); + + static void UnregisterInternalCallback(const EtwInternalCallback& callback); + + private: + QnnTelemetry(); + ~QnnTelemetry(); + ORT_DISALLOW_COPY_ASSIGNMENT_AND_MOVE(QnnTelemetry); + +#if !BUILD_QNN_EP_STATIC_LIB + static std::mutex mutex_; + static uint32_t global_register_count_; + static bool enabled_; + + static std::vector callbacks_; + static std::mutex callbacks_mutex_; + static std::mutex provider_change_mutex_; + static UCHAR level_; + static ULONGLONG keyword_; + + static void InvokeCallbacks(LPCGUID SourceId, ULONG IsEnabled, UCHAR Level, ULONGLONG MatchAnyKeyword, + ULONGLONG MatchAllKeyword, PEVENT_FILTER_DESCRIPTOR FilterData, PVOID CallbackContext); + + static void NTAPI ORT_TL_EtwEnableCallback( + _In_ LPCGUID SourceId, + _In_ ULONG IsEnabled, + _In_ UCHAR Level, + _In_ ULONGLONG MatchAnyKeyword, + _In_ ULONGLONG MatchAllKeyword, + _In_opt_ PEVENT_FILTER_DESCRIPTOR FilterData, + _In_opt_ PVOID CallbackContext); +#endif +}; + +} // namespace qnn +} // namespace onnxruntime + +#endif // defined(_WIN32) diff --git a/onnxruntime/core/providers/qnn/rpcmem_library.cc b/onnxruntime/core/providers/qnn/rpcmem_library.cc new file mode 100644 index 0000000000000..93c5ed54ab371 --- /dev/null +++ b/onnxruntime/core/providers/qnn/rpcmem_library.cc @@ -0,0 +1,65 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License + +#include "core/providers/qnn/rpcmem_library.h" +#include "core/providers/qnn/ort_api.h" + +namespace onnxruntime::qnn { + +namespace { + +const PathChar* GetRpcMemSharedLibraryPath() { +#if defined(_WIN32) + return ORT_TSTR("libcdsprpc.dll"); +#else + return ORT_TSTR("libcdsprpc.so"); +#endif +} + +DynamicLibraryHandle LoadDynamicLibrary(const PathString& path, bool global_symbols) { + // Custom deleter to unload the shared library. Avoid throwing from it because it may run in dtor. + const auto unload_library = [](void* library_handle) { + if (library_handle == nullptr) { + return; + } + + const auto& env = GetDefaultEnv(); + const auto unload_status = env.UnloadDynamicLibrary(library_handle); + + if (!unload_status.IsOK()) { + LOGS_DEFAULT(WARNING) << "Failed to unload shared library. Error: " << unload_status.ErrorMessage(); + } + }; + + const auto& env = GetDefaultEnv(); + void* library_handle = nullptr; + + const auto load_status = env.LoadDynamicLibrary(path, global_symbols, &library_handle); + if (!load_status.IsOK()) { + ORT_THROW("Failed to load ", ToUTF8String(path), ": ", load_status.ErrorMessage()); + } + + return DynamicLibraryHandle{library_handle, unload_library}; +} + +RpcMemApi CreateApi(void* library_handle) { + RpcMemApi api{}; + + const auto& env = GetDefaultEnv(); + ORT_THROW_IF_ERROR(env.GetSymbolFromLibrary(library_handle, "rpcmem_alloc", (void**)&api.alloc)); + + ORT_THROW_IF_ERROR(env.GetSymbolFromLibrary(library_handle, "rpcmem_free", (void**)&api.free)); + + ORT_THROW_IF_ERROR(env.GetSymbolFromLibrary(library_handle, "rpcmem_to_fd", (void**)&api.to_fd)); + + return api; +} + +} // namespace + +RpcMemLibrary::RpcMemLibrary() + : library_handle_(LoadDynamicLibrary(GetRpcMemSharedLibraryPath(), /* global_symbols */ false)), + api_{CreateApi(library_handle_.get())} { +} + +} // namespace onnxruntime::qnn diff --git a/onnxruntime/core/providers/qnn/rpcmem_library.h b/onnxruntime/core/providers/qnn/rpcmem_library.h new file mode 100644 index 0000000000000..0642c96798188 --- /dev/null +++ b/onnxruntime/core/providers/qnn/rpcmem_library.h @@ -0,0 +1,68 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License + +#pragma once + +#include +#include + +#include "core/providers/qnn/ort_api.h" + +namespace onnxruntime::qnn { + +using DynamicLibraryHandle = std::unique_ptr; + +// This namespace contains constants and typedefs corresponding to functions from rpcmem.h. +// https://github.com/quic/fastrpc/blob/v0.1.1/inc/rpcmem.h +namespace rpcmem { + +constexpr uint32_t RPCMEM_DEFAULT_FLAGS = 1; + +constexpr int RPCMEM_HEAP_ID_SYSTEM = 25; + +/** + * Allocate a zero-copy buffer for size upto 2 GB with the FastRPC framework. + * Buffers larger than 2 GB must be allocated with rpcmem_alloc2 + * @param[in] heapid Heap ID to use for memory allocation. + * @param[in] flags ION flags to use for memory allocation. + * @param[in] size Buffer size to allocate. + * @return Pointer to the buffer on success; NULL on failure. + */ +using AllocFnPtr = void* (*)(int heapid, uint32_t flags, int size); + +/** + * Free a buffer and ignore invalid buffers. + */ +using FreeFnPtr = void (*)(void* po); + +/** + * Return an associated file descriptor. + * @param[in] po Data pointer for an RPCMEM-allocated buffer. + * @return Buffer file descriptor. + */ +using ToFdFnPtr = int (*)(void* po); + +} // namespace rpcmem + +// RPCMEM API function pointers. +struct RpcMemApi { + rpcmem::AllocFnPtr alloc; + rpcmem::FreeFnPtr free; + rpcmem::ToFdFnPtr to_fd; +}; + +// Loads and provides access to the RPCMEM API functions from a dynamically loaded library. +class RpcMemLibrary { + public: + RpcMemLibrary(); + + ORT_DISALLOW_COPY_ASSIGNMENT_AND_MOVE(RpcMemLibrary); + + const RpcMemApi& Api() const { return api_; } + + private: + DynamicLibraryHandle library_handle_; + RpcMemApi api_; +}; + +} // namespace onnxruntime::qnn diff --git a/onnxruntime/core/providers/qnn/shared_context.h b/onnxruntime/core/providers/qnn/shared_context.h new file mode 100644 index 0000000000000..81de357dbe677 --- /dev/null +++ b/onnxruntime/core/providers/qnn/shared_context.h @@ -0,0 +1,76 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License + +#include +#include +#include + +#include "core/providers/qnn/ort_api.h" +#include "core/providers/qnn/builder/qnn_model.h" + +#pragma once + +namespace onnxruntime { + +class SharedContext { + public: + static SharedContext& GetInstance() { + static SharedContext instance_; + return instance_; + } + + bool HasSharedQnnModels() { + const std::lock_guard lock(mtx_); + return !shared_qnn_models_.empty(); + } + + bool HasQnnModel(const std::string& model_name) { + auto it = find_if(shared_qnn_models_.begin(), shared_qnn_models_.end(), + [&model_name](const std::unique_ptr& qnn_model) { return qnn_model->Name() == model_name; }); + return it != shared_qnn_models_.end(); + } + + std::unique_ptr GetSharedQnnModel(const std::string& model_name) { + const std::lock_guard lock(mtx_); + auto it = find_if(shared_qnn_models_.begin(), shared_qnn_models_.end(), + [&model_name](const std::unique_ptr& qnn_model) { return qnn_model->Name() == model_name; }); + if (it == shared_qnn_models_.end()) { + return nullptr; + } + auto qnn_model = std::move(*it); + shared_qnn_models_.erase(it); + return qnn_model; + } + + bool SetSharedQnnModel(std::vector>&& shared_qnn_models, + std::string& duplicate_graph_names) { + const std::lock_guard lock(mtx_); + bool graph_exist = false; + for (auto& shared_qnn_model : shared_qnn_models) { + auto& model_name = shared_qnn_model->Name(); + auto it = find_if(shared_qnn_models_.begin(), shared_qnn_models_.end(), + [&model_name](const std::unique_ptr& qnn_model) { return qnn_model->Name() == model_name; }); + if (it == shared_qnn_models_.end()) { + shared_qnn_models_.push_back(std::move(shared_qnn_model)); + } else { + duplicate_graph_names.append(model_name + " "); + graph_exist = true; + } + } + + return graph_exist; + } + + private: + SharedContext() = default; + ~SharedContext() = default; + + ORT_DISALLOW_COPY_ASSIGNMENT_AND_MOVE(SharedContext); + + std::vector> shared_qnn_models_; + // Producer sessions can be in parallel + // Consumer sessions have to be after producer sessions initialized + std::mutex mtx_; +}; + +} // namespace onnxruntime diff --git a/onnxruntime/core/providers/qnn/symbols.def b/onnxruntime/core/providers/qnn/symbols.def new file mode 100644 index 0000000000000..4ec2f7914c208 --- /dev/null +++ b/onnxruntime/core/providers/qnn/symbols.def @@ -0,0 +1,2 @@ +EXPORTS + GetProvider diff --git a/onnxruntime/core/providers/qnn/version_script.lds b/onnxruntime/core/providers/qnn/version_script.lds new file mode 100644 index 0000000000000..094abb3329781 --- /dev/null +++ b/onnxruntime/core/providers/qnn/version_script.lds @@ -0,0 +1,9 @@ +#_init and _fini should be local +VERS_1.0 { + global: + GetProvider; + + # Hide everything else. + local: + *; +}; diff --git a/onnxruntime/core/providers/rknpu/node_attr_helper.h b/onnxruntime/core/providers/rknpu/node_attr_helper.h index 6ab8f8c6bb953..76a0c721f70aa 100644 --- a/onnxruntime/core/providers/rknpu/node_attr_helper.h +++ b/onnxruntime/core/providers/rknpu/node_attr_helper.h @@ -4,7 +4,7 @@ #pragma once -#include +#include "core/graph/onnx_protobuf.h" #include #include diff --git a/onnxruntime/core/providers/rknpu/onnx_converter.h b/onnxruntime/core/providers/rknpu/onnx_converter.h index e90efd75b9c7f..10cc09a9dba92 100644 --- a/onnxruntime/core/providers/rknpu/onnx_converter.h +++ b/onnxruntime/core/providers/rknpu/onnx_converter.h @@ -2,7 +2,7 @@ #pragma once -#include +#include "core/graph/onnx_protobuf.h" #include #include diff --git a/onnxruntime/core/providers/rocm/fpgeneric.cu b/onnxruntime/core/providers/rocm/fpgeneric.cu index 18edb359f6062..97570721b0d62 100644 --- a/onnxruntime/core/providers/rocm/fpgeneric.cu +++ b/onnxruntime/core/providers/rocm/fpgeneric.cu @@ -53,8 +53,27 @@ __global__ void CopyVectorBFloat16(const onnxruntime::BFloat16* x, int incx, onn } // namespace +dim3 hipblasTransposeHelperDimGrid(int m, int n) { + return dim3((n + TRANS_TILE_DIM - 1) / TRANS_TILE_DIM, (m + TRANS_TILE_DIM - 1) / TRANS_TILE_DIM, 1); +} + +// hipblasTransposeHelper can only be used if it won't overflow the maxGridSize y dimension size +__host__ bool CanUse_hipblasTransposeHelper_MLFloat16(int m, int n) { + dim3 dimGrid = hipblasTransposeHelperDimGrid(m, n); + + int deviceId; + hipError_t hipError = hipGetDevice(&deviceId); + if (hipError != 0) return false; + + hipDeviceProp_t deviceProp; + hipError = hipGetDeviceProperties(&deviceProp, deviceId); + if (hipError != 0) return false; + + return dimGrid.y < deviceProp.maxGridSize[1]; +} + hipblasStatus_t hipblasTransposeHelper(hipStream_t stream, hipblasHandle_t, hipblasOperation_t , hipblasOperation_t , int m, int n, const half*, const half* A, int, const half*, const half*, int, half* C, int) { - if (C != A) { +if (C != A) { dim3 dimGrid((n + TRANS_TILE_DIM - 1) / TRANS_TILE_DIM, (m + TRANS_TILE_DIM - 1) / TRANS_TILE_DIM, 1); dim3 dimBlock(TRANS_TILE_DIM, BLOCK_ROWS, 1); @@ -73,7 +92,7 @@ hipblasStatus_t hipblasCopyHelper(hipStream_t stream, hipblasHandle_t, int n, co } hipblasStatus_t hipblasCopyHelper(hipStream_t stream, hipblasHandle_t, int n, const onnxruntime::BFloat16* x, int incx, - onnxruntime::BFloat16* y, int incy) { + onnxruntime::BFloat16* y, int incy) { dim3 dimGrid((unsigned int)(n + COPY_BLOCK_DIM - 1) / COPY_BLOCK_DIM, 1, 1); dim3 dimBlock(COPY_BLOCK_DIM, 1, 1); CopyVectorBFloat16<<>>(x, incx, y, incy, n); diff --git a/onnxruntime/core/providers/rocm/rocm_profiler.h b/onnxruntime/core/providers/rocm/rocm_profiler.h index d5c7e3f273565..52c6d4ea05f99 100644 --- a/onnxruntime/core/providers/rocm/rocm_profiler.h +++ b/onnxruntime/core/providers/rocm/rocm_profiler.h @@ -34,9 +34,9 @@ class RocmProfiler final : public EpProfiler { ORT_DISALLOW_COPY_ASSIGNMENT_AND_MOVE(RocmProfiler); ~RocmProfiler() {} bool StartProfiling(TimePoint) override { return true; } - void EndProfiling(TimePoint, Events&) override {}; - void Start(uint64_t) override{}; - void Stop(uint64_t) override{}; + void EndProfiling(TimePoint, Events&) override {} + void Start(uint64_t) override {} + void Stop(uint64_t) override {} }; } // namespace profiling diff --git a/onnxruntime/core/providers/rocm/shared_inc/fpgeneric.h b/onnxruntime/core/providers/rocm/shared_inc/fpgeneric.h index 675b30612065b..39d5306b15b7e 100644 --- a/onnxruntime/core/providers/rocm/shared_inc/fpgeneric.h +++ b/onnxruntime/core/providers/rocm/shared_inc/fpgeneric.h @@ -955,3 +955,5 @@ inline rocblas_status rocblasGemmStridedBatchedHelper(rocblas_handle handle, C, ldc, strideC, batchCount); } +bool CanUse_hipblasTransposeHelper_MLFloat16(int m, int n); +hipblasStatus_t hipblasTransposeHelper(hipStream_t stream, rocblas_handle, rocblas_operation, rocblas_operation, int m, int n, const half*, const half* A, int, const half*, const half*, int, half* C, int); diff --git a/onnxruntime/core/providers/shared_library/provider_api.h b/onnxruntime/core/providers/shared_library/provider_api.h index 45f81ed22b7f7..6ff2572e5e668 100644 --- a/onnxruntime/core/providers/shared_library/provider_api.h +++ b/onnxruntime/core/providers/shared_library/provider_api.h @@ -9,6 +9,11 @@ #pragma once #define SHARED_PROVIDER 1 +#ifdef _WIN32 +#include +#include +#endif // defined(_WIN32) + #include #include #include @@ -136,6 +141,17 @@ enum class DataType { USER = 1 ///< Contains potentially sensitive user data. }; +enum class ORTTraceLoggingKeyword : uint64_t { + Session = 0x1, // ORT Session TraceLoggingWrite + Logs = 0x2, // LOGS() Macro ORT logs. Pair with an appropriate level depending on detail required + Reserved1 = 0x4, // Reserved if we want to add some specific sub-categories instead of just LOGS() or other uses + Reserved2 = 0x8, + Reserved3 = 0x10, + Reserved4 = 0x20, + Reserved5 = 0x40, + Reserved6 = 0x80, + Profiling = 0x100 // Enables profiling. At higher levels >5 can impact inference performance +}; } // namespace logging // OnnxRuntime Types (these are the internal types) @@ -143,6 +159,13 @@ struct CPUIDInfo; namespace logging { struct Logger; struct Capture; +#ifdef _WIN32 +struct EtwRegistrationManager; +using EtwRegistrationManager_EtwInternalCallback = std::function; +#endif } // namespace logging struct ComputeCapability; struct ConfigOptions; @@ -157,10 +180,12 @@ struct KernelRegistry; struct Function; struct Graph; class GraphViewer; +struct ConstGraphNodes; enum class DataLayout; struct Model; struct Path; struct Node; +struct Node_EdgeEnd; struct NodeArg; struct NodeAttributes; struct NodeUnitIODef; @@ -215,6 +240,7 @@ using DeleteFunc = void (*)(void*); using NodeArgInfo = ONNX_NAMESPACE::ValueInfoProto; using NameMLValMap = std::unordered_map; + } // namespace onnxruntime #include "core/platform/threadpool.h" @@ -368,6 +394,28 @@ template <> constexpr ONNXTensorElementDataType GetONNXTensorElementDataType() { return ONNX_TENSOR_ELEMENT_DATA_TYPE_UINT4; } + +inline std::vector> +CreateSupportedPartitions(const GraphViewer& graph_viewer, + const std::unordered_set& supported_nodes, + const std::unordered_set& stop_ops, + const std::function& generate_metadef_name, + const std::string& execution_provider_name, + const std::string& execution_provider_type, + const std::unordered_map* node_unit_map, + bool drop_constant_initializers = false) { + return g_host->Utils__CreateSupportedPartitions(graph_viewer, supported_nodes, stop_ops, generate_metadef_name, + execution_provider_name, execution_provider_type, node_unit_map, + drop_constant_initializers); +} +inline std::unique_ptr MakeComputeCapability(const GraphViewer& graph_viewer, + const std::vector& group, + const std::function& generate_metadef_name, + const std::string& execution_provider_name, + bool drop_constant_initializers) { + return g_host->Utils__MakeComputeCapability(graph_viewer, group, generate_metadef_name, + execution_provider_name, drop_constant_initializers); +} } // namespace utils namespace QDQ { @@ -381,6 +429,10 @@ GetAllNodeUnits(const GraphViewer* graph_viewer, const logging::Logger& logger) // So the C API (and C++) becomes available when ORT_API_MANUAL_INIT is used. void InitProviderOrtApi(); +// This is a replacement for Env::Default(). Returns a reference to the default ORT Environment. +inline Env& GetDefaultEnv() { + return g_host->Env__Default(); +} } // namespace onnxruntime #define CREATE_MESSAGE(logger, severity, category, datatype) \ diff --git a/onnxruntime/core/providers/shared_library/provider_bridge_provider.cc b/onnxruntime/core/providers/shared_library/provider_bridge_provider.cc index aa8c367d25d51..4c050534456da 100644 --- a/onnxruntime/core/providers/shared_library/provider_bridge_provider.cc +++ b/onnxruntime/core/providers/shared_library/provider_bridge_provider.cc @@ -505,6 +505,9 @@ Status UnpackInitializerData(const ONNX_NAMESPACE::TensorProto& tensor, const st /*out*/ std::vector& unpacked_tensor) { return g_host->UnpackInitializerData(tensor, model_path, unpacked_tensor); } +Status UnpackInitializerData(const ONNX_NAMESPACE::TensorProto& tensor, /*out*/ std::vector& unpacked_tensor) { + return g_host->UnpackInitializerData(tensor, std::filesystem::path(), unpacked_tensor); +} } // namespace utils @@ -788,5 +791,5 @@ std::string ToUTF8String(const std::wstring& s) { std::wstring ToWideString(const std::string& s) { return g_host->ToWideString(s); } -#endif +#endif // _WIN32 } // namespace onnxruntime diff --git a/onnxruntime/core/providers/shared_library/provider_interfaces.h b/onnxruntime/core/providers/shared_library/provider_interfaces.h index 5a179ec622f8c..a1bb86598ebc0 100644 --- a/onnxruntime/core/providers/shared_library/provider_interfaces.h +++ b/onnxruntime/core/providers/shared_library/provider_interfaces.h @@ -120,11 +120,20 @@ struct Node__EdgeIterator { virtual bool operator!=(const Node__EdgeIterator& p) const = 0; virtual void operator++() = 0; + virtual const Node_EdgeEnd& operator*() const = 0; virtual const Node& GetNode() const = 0; virtual int GetSrcArgIndex() const = 0; virtual int GetDstArgIndex() const = 0; }; +struct ConstGraphNodes_Iterator { + virtual ~ConstGraphNodes_Iterator() {} + + virtual bool operator!=(const ConstGraphNodes_Iterator& other) const = 0; + virtual void operator++() = 0; + virtual const Node& operator*() = 0; +}; + // There are two ways to route a function, one is a virtual method and the other is a function pointer (or pointer to // member function). // The function pointers are nicer in that they directly call the target function, but they cannot be used in cases @@ -169,7 +178,6 @@ struct ProviderHost { virtual std::string demangle(const char* name) = 0; virtual std::string demangle(const std::string& name) = 0; -#ifdef USE_CUDA virtual std::unique_ptr CreateCUDAAllocator(int16_t device_id, const char* name) = 0; virtual std::unique_ptr CreateCUDAPinnedAllocator(const char* name) = 0; virtual std::unique_ptr CreateGPUDataTransfer() = 0; @@ -181,7 +189,6 @@ struct ProviderHost { virtual Status CudaCall_false(int retCode, const char* exprString, const char* libName, int successCode, const char* msg, const char* file, const int line) = 0; virtual void CudaCall_true(int retCode, const char* exprString, const char* libName, int successCode, const char* msg, const char* file, const int line) = 0; -#endif #ifdef USE_MIGRAPHX virtual std::unique_ptr CreateMIGraphXAllocator(int16_t device_id, const char* name) = 0; @@ -191,7 +198,6 @@ struct ProviderHost { #ifdef USE_ROCM virtual std::unique_ptr CreateROCMAllocator(int16_t device_id, const char* name) = 0; virtual std::unique_ptr CreateROCMPinnedAllocator(const char* name) = 0; - virtual std::unique_ptr CreateGPUDataTransfer() = 0; virtual void rocm__Impl_Cast(void* stream, const int64_t* input_data, int32_t* output_data, size_t count) = 0; virtual void rocm__Impl_Cast(void* stream, const int32_t* input_data, int64_t* output_data, size_t count) = 0; @@ -273,20 +279,41 @@ struct ProviderHost { // logging::Logger virtual bool logging__Logger__OutputIsEnabled(const logging::Logger* p, logging::Severity severity, logging::DataType data_type) = 0; + virtual logging::Severity logging__Logger__GetSeverity(const logging::Logger* p) = 0; // logging::LoggingManager virtual const logging::Logger& logging__LoggingManager__DefaultLogger() = 0; + virtual bool logging__LoggingManager__HasDefaultLogger() = 0; // logging::Capture - virtual std::unique_ptr logging__Capture__construct(const logging::Logger& logger, logging::Severity severity, const char* category, logging::DataType dataType, const CodeLocation& location) = 0; + virtual std::unique_ptr logging__Capture__construct(const logging::Logger& logger, + logging::Severity severity, + const char* category, + logging::DataType data_type, + const CodeLocation& location) = 0; virtual void logging__Capture__operator_delete(logging::Capture* p) noexcept = 0; virtual std::ostream& logging__Capture__Stream(logging::Capture* p) noexcept = 0; + virtual void logging__Capture__ProcessPrintf(logging::Capture* p, const char* format, va_list args) = 0; + +#if defined(_WIN32) + // logging::EtwRegistrationManager + virtual logging::EtwRegistrationManager& logging__EtwRegistrationManager__Instance() = 0; + virtual bool logging__EtwRegistrationManager__SupportsETW() = 0; + virtual logging::Severity logging__EtwRegistrationManager__MapLevelToSeverity(logging::EtwRegistrationManager* p) = 0; + virtual void logging__EtwRegistrationManager__RegisterInternalCallback( + logging::EtwRegistrationManager* p, + const logging::EtwRegistrationManager_EtwInternalCallback& callback) = 0; + virtual void logging__EtwRegistrationManager__UnregisterInternalCallback( + logging::EtwRegistrationManager* p, + const logging::EtwRegistrationManager_EtwInternalCallback& callback) = 0; +#endif // defined(_WIN32) // Env virtual Env& Env__Default() = 0; // Utils::DataTypeUtils virtual const std::string* Utils__DataTypeUtils__ToType(const ONNX_NAMESPACE::TypeProto& type_proto) = 0; + virtual const std::string* Utils__DataTypeUtils__ToType(const std::string& type_str) = 0; // int64s virtual int int64s__size(const ONNX_NAMESPACE::int64s* p) = 0; @@ -328,6 +355,7 @@ struct ProviderHost { virtual bool TypeProto_Tensor__has_shape(const ONNX_NAMESPACE::TypeProto_Tensor* p) = 0; virtual const ONNX_NAMESPACE::TensorShapeProto& TypeProto_Tensor__shape(const ONNX_NAMESPACE::TypeProto_Tensor* p) = 0; virtual ONNX_NAMESPACE::TensorShapeProto* TypeProto_Tensor__mutable_shape(ONNX_NAMESPACE::TypeProto_Tensor* p) = 0; + virtual bool TypeProto_Tensor__has_elem_type(const ONNX_NAMESPACE::TypeProto_Tensor* p) = 0; virtual int32_t TypeProto_Tensor__elem_type(const ONNX_NAMESPACE::TypeProto_Tensor* p) = 0; virtual void TypeProto_Tensor__set_elem_type(ONNX_NAMESPACE::TypeProto_Tensor* p, int32_t value) = 0; @@ -342,6 +370,7 @@ struct ProviderHost { // TypeProto virtual std::unique_ptr TypeProto__construct() = 0; virtual void TypeProto__CopyFrom(ONNX_NAMESPACE::TypeProto* p, const ONNX_NAMESPACE::TypeProto* other) = 0; + virtual bool TypeProto__has_tensor_type(const ONNX_NAMESPACE::TypeProto* p) = 0; virtual const ONNX_NAMESPACE::TypeProto_Tensor& TypeProto__tensor_type(const ONNX_NAMESPACE::TypeProto* p) = 0; virtual ONNX_NAMESPACE::TypeProto_Tensor* TypeProto__mutable_tensor_type(ONNX_NAMESPACE::TypeProto* p) = 0; @@ -462,6 +491,7 @@ struct ProviderHost { virtual bool TensorProto__has_raw_data(const ONNX_NAMESPACE::TensorProto* p) = 0; virtual const std::string& TensorProto__raw_data(const ONNX_NAMESPACE::TensorProto* p) = 0; virtual std::string* TensorProto__mutable_raw_data(ONNX_NAMESPACE::TensorProto* p) = 0; + virtual bool TensorProto__has_data_type(const ONNX_NAMESPACE::TensorProto* p) = 0; virtual int32_t TensorProto__data_type(const ONNX_NAMESPACE::TensorProto* p) = 0; virtual void TensorProto__set_data_type(ONNX_NAMESPACE::TensorProto* p, int32_t type) = 0; virtual void TensorProto__CopyFrom(ONNX_NAMESPACE::TensorProto* p, const ONNX_NAMESPACE::TensorProto* other) = 0; @@ -495,6 +525,7 @@ struct ProviderHost { // TensorShapeProto_Dimensions virtual std::unique_ptr TensorShapeProto_Dimensions__begin(const ONNX_NAMESPACE::TensorShapeProto_Dimensions* p) = 0; virtual std::unique_ptr TensorShapeProto_Dimensions__end(const ONNX_NAMESPACE::TensorShapeProto_Dimensions* p) = 0; + virtual size_t TensorShapeProto_Dimensions__size(const ONNX_NAMESPACE::TensorShapeProto_Dimensions* p) = 0; // TensorShapeProto virtual int TensorShapeProto__dim_size(const ONNX_NAMESPACE::TensorShapeProto* p) = 0; @@ -823,6 +854,8 @@ struct ProviderHost { virtual const NodeAttributes& Node__GetAttributes(const Node* p) noexcept = 0; virtual void Node__AddAttribute(Node* p, const ::std::string& attr_name, const ONNX_NAMESPACE::GraphProto& value) = 0; + virtual void Node__AddAttribute(Node* p, const ::std::string& attr_name, const std::string& value) = 0; + virtual void Node__AddAttribute(Node* p, const ::std::string& attr_name, int64_t value) = 0; virtual size_t Node__GetInputEdgesCount(const Node* p) noexcept = 0; virtual size_t Node__GetOutputEdgesCount(const Node* p) noexcept = 0; @@ -842,6 +875,11 @@ struct ProviderHost { virtual const std::unordered_map>& Node__GetAttributeNameToMutableSubgraphMap(Node* p) = 0; virtual std::unordered_map> Node__GetAttributeNameToSubgraphMap(const Node* p) const = 0; + // Node_EdgeEnd + virtual const Node& Node_EdgeEnd__GetNode(const Node_EdgeEnd* p) = 0; + virtual int Node_EdgeEnd__GetSrcArgIndex(const Node_EdgeEnd* p) = 0; + virtual int Node_EdgeEnd__GetDstArgIndex(const Node_EdgeEnd* p) = 0; + // NodeArg virtual const std::string& NodeArg__Name(const NodeArg* p) noexcept = 0; virtual const ONNX_NAMESPACE::TensorShapeProto* NodeArg__Shape(const NodeArg* p) = 0; @@ -872,6 +910,8 @@ struct ProviderHost { virtual void NodeAttributes__reserve(NodeAttributes* p, size_t size) = 0; // NodeUnit + virtual void NodeUnit__operator_delete(NodeUnit* p) noexcept = 0; + virtual int NodeUnit__UnitType(const NodeUnit* p) noexcept = 0; virtual const std::vector& NodeUnit__Inputs(const NodeUnit* p) noexcept = 0; @@ -897,10 +937,29 @@ struct ProviderHost { virtual std::pair>, std::unordered_map> QDQ__GetAllNodeUnits(const GraphViewer* graph_viewer, const logging::Logger& logger) = 0; + // Partitioning utils + virtual std::vector> + Utils__CreateSupportedPartitions(const GraphViewer& graph_viewer, + const std::unordered_set& supported_nodes, + const std::unordered_set& stop_ops, + const std::function& generate_metadef_name, + const std::string& execution_provider_name, + const std::string& execution_provider_type, + const std::unordered_map* node_unit_map, + bool drop_constant_initializers) = 0; + + virtual std::unique_ptr + Utils__MakeComputeCapability(const GraphViewer& graph_viewer, + const std::vector& group, + const std::function& generate_metadef_name, + const std::string& execution_provider_name, + bool drop_constant_initializers) = 0; // Model virtual std::unique_ptr Model__construct(ONNX_NAMESPACE::ModelProto&& model_proto, const PathString& model_path, const IOnnxRuntimeOpSchemaRegistryList* local_registries, const logging::Logger& logger) = 0; + virtual std::unique_ptr Model__construct(const std::string& graph_name, bool is_onnx_domain_only, + const logging::Logger& logger) = 0; virtual void Model__operator_delete(Model* p) = 0; virtual Graph& Model__MainGraph(Model* p) = 0; virtual std::unique_ptr Model__ToProto(Model* p) = 0; @@ -974,6 +1033,7 @@ struct ProviderHost { virtual const std::string& GraphViewer__Name(const GraphViewer* p) noexcept = 0; virtual const std::filesystem::path& GraphViewer__ModelPath(const GraphViewer* p) noexcept = 0; + virtual const ConstGraphNodes& GraphViewer__Nodes(const GraphViewer* p) noexcept = 0; virtual const Node* GraphViewer__GetNode(const GraphViewer* p, NodeIndex node_index) = 0; virtual const NodeArg* GraphViewer__GetNodeArg(const GraphViewer* p, const std::string& name) = 0; @@ -989,6 +1049,7 @@ struct ProviderHost { virtual const std::vector& GraphViewer__GetInputs(const GraphViewer* p) noexcept = 0; virtual const std::vector& GraphViewer__GetOutputs(const GraphViewer* p) noexcept = 0; + virtual bool GraphViewer__NodeProducesGraphOutput(const GraphViewer* p, const Node& node) = 0; virtual const std::unordered_set& GraphViewer__GetValueInfo(const GraphViewer* p) noexcept = 0; virtual const InitializedTensorSet& GraphViewer__GetAllInitializedTensors(const GraphViewer* p) = 0; @@ -1007,6 +1068,13 @@ struct ProviderHost { virtual const Node* GraphViewer__GetProducerNode(const GraphViewer* p, const std::string& node_arg_name) const = 0; virtual IOnnxRuntimeOpSchemaCollectionPtr GraphViewer__GetSchemaRegistry(const GraphViewer* p) const = 0; + // ConstGraphNodes + virtual std::unique_ptr ConstGraphNodes__begin(const ConstGraphNodes* p) = 0; + virtual std::unique_ptr ConstGraphNodes__end(const ConstGraphNodes* p) = 0; + virtual std::unique_ptr ConstGraphNodes__cbegin(const ConstGraphNodes* p) = 0; + virtual std::unique_ptr ConstGraphNodes__cend(const ConstGraphNodes* p) = 0; + virtual bool ConstGraphNodes__empty(const ConstGraphNodes* p) noexcept = 0; + // OpKernel virtual const Node& OpKernel__Node(const OpKernel* p) = 0; @@ -1185,9 +1253,7 @@ struct ProviderHost { virtual training::DistributedRunContext& GetDistributedRunContextInstance() = 0; #endif -#if defined(USE_CUDA) || defined(USE_ROCM) virtual PhiloxGenerator& PhiloxGenerator__Default() = 0; -#endif #ifdef ENABLE_TRAINING_TORCH_INTEROP virtual void contrib__PythonOpBase__Init(contrib::PythonOpBase* p, const OpKernelInfo& info) = 0; diff --git a/onnxruntime/core/providers/shared_library/provider_wrappedtypes.h b/onnxruntime/core/providers/shared_library/provider_wrappedtypes.h index 76b6d8063fd66..e434935343663 100644 --- a/onnxruntime/core/providers/shared_library/provider_wrappedtypes.h +++ b/onnxruntime/core/providers/shared_library/provider_wrappedtypes.h @@ -23,27 +23,50 @@ namespace logging { struct Logger final { bool OutputIsEnabled(Severity severity, DataType data_type) const noexcept { return g_host->logging__Logger__OutputIsEnabled(this, severity, data_type); } + Severity GetSeverity() const noexcept { + return g_host->logging__Logger__GetSeverity(this); + } PROVIDER_DISALLOW_ALL(Logger) }; struct LoggingManager final { static const Logger& DefaultLogger() { return g_host->logging__LoggingManager__DefaultLogger(); } + static bool HasDefaultLogger() { return g_host->logging__LoggingManager__HasDefaultLogger(); } PROVIDER_DISALLOW_ALL(LoggingManager) }; struct Capture final { static std::unique_ptr Create(const Logger& logger, logging::Severity severity, const char* category, - logging::DataType dataType, const CodeLocation& location) { return g_host->logging__Capture__construct(logger, severity, category, dataType, location); } + logging::DataType data_type, const CodeLocation& location) { + return g_host->logging__Capture__construct(logger, severity, category, data_type, location); + } static void operator delete(void* p) { g_host->logging__Capture__operator_delete(reinterpret_cast(p)); } std::ostream& Stream() noexcept { return g_host->logging__Capture__Stream(this); } + void ProcessPrintf(const char* format, va_list args) { g_host->logging__Capture__ProcessPrintf(this, format, args); } Capture() = delete; Capture(const Capture&) = delete; void operator=(const Capture&) = delete; }; + +#if defined(_WIN32) +struct EtwRegistrationManager final { + using EtwInternalCallback = EtwRegistrationManager_EtwInternalCallback; + static EtwRegistrationManager& Instance() { return g_host->logging__EtwRegistrationManager__Instance(); } + static bool SupportsETW() { return g_host->logging__EtwRegistrationManager__SupportsETW(); } + Severity MapLevelToSeverity() { return g_host->logging__EtwRegistrationManager__MapLevelToSeverity(this); } + void RegisterInternalCallback(const EtwInternalCallback& callback) { + g_host->logging__EtwRegistrationManager__RegisterInternalCallback(this, callback); + } + void UnregisterInternalCallback(const EtwInternalCallback& callback) { + g_host->logging__EtwRegistrationManager__UnregisterInternalCallback(this, callback); + } +}; +#endif // defined(_WIN32) + } // namespace logging } // namespace onnxruntime @@ -234,6 +257,7 @@ struct TensorProto final { const std::string& raw_data() const { return g_host->TensorProto__raw_data(this); } std::string* mutable_raw_data() { return g_host->TensorProto__mutable_raw_data(this); } + bool has_data_type() const { return g_host->TensorProto__has_data_type(this); } int32_t data_type() const { return g_host->TensorProto__data_type(this); } void set_data_type(int32_t type) { return g_host->TensorProto__set_data_type(this, type); } @@ -286,6 +310,7 @@ struct TensorShapeProto_Dimension final { struct TensorShapeProto_Dimensions final { IteratorHolder begin() const { return g_host->TensorShapeProto_Dimensions__begin(this); } IteratorHolder end() const { return g_host->TensorShapeProto_Dimensions__end(this); } + size_t size() const { return g_host->TensorShapeProto_Dimensions__size(this); } PROVIDER_DISALLOW_ALL(TensorShapeProto_Dimensions) }; @@ -305,6 +330,7 @@ struct TypeProto_Tensor final { bool has_shape() const { return g_host->TypeProto_Tensor__has_shape(this); } const TensorShapeProto& shape() const { return g_host->TypeProto_Tensor__shape(this); } TensorShapeProto* mutable_shape() { return g_host->TypeProto_Tensor__mutable_shape(this); } + bool has_elem_type() const { return g_host->TypeProto_Tensor__has_elem_type(this); } int32_t elem_type() const { return g_host->TypeProto_Tensor__elem_type(this); } void set_elem_type(int32_t value) { g_host->TypeProto_Tensor__set_elem_type(this, value); } @@ -339,6 +365,7 @@ struct TypeProto_Sequence final { struct TypeProto final { static std::unique_ptr Create() { return g_host->TypeProto__construct(); } + bool has_tensor_type() const { return g_host->TypeProto__has_tensor_type(this); } const TypeProto_Tensor& tensor_type() const { return g_host->TypeProto__tensor_type(this); } TypeProto_Tensor* mutable_tensor_type() { return g_host->TypeProto__mutable_tensor_type(this); } @@ -475,6 +502,7 @@ namespace Utils { struct DataTypeUtils final { static const std::string* ToType(const ONNX_NAMESPACE::TypeProto& type_proto) { return g_host->Utils__DataTypeUtils__ToType(type_proto); } + static const std::string* ToType(const std::string& type_str) { return g_host->Utils__DataTypeUtils__ToType(type_str); } PROVIDER_DISALLOW_ALL(DataTypeUtils) }; @@ -770,6 +798,14 @@ struct Function final { PROVIDER_DISALLOW_ALL(Function) }; +struct Node_EdgeEnd final { + const Node& GetNode() const { return g_host->Node_EdgeEnd__GetNode(this); } + int GetSrcArgIndex() const { return g_host->Node_EdgeEnd__GetSrcArgIndex(this); } + int GetDstArgIndex() const { return g_host->Node_EdgeEnd__GetDstArgIndex(this); } + + PROVIDER_DISALLOW_ALL(Node_EdgeEnd) +}; + struct Node final { enum class Type { Primitive = 0, @@ -801,6 +837,12 @@ struct Node final { void AddAttribute(const ::std::string& attr_name, const ONNX_NAMESPACE::GraphProto& value) { g_host->Node__AddAttribute(this, attr_name, value); } + void AddAttribute(const std::string& attr_name, const std::string& value) { + g_host->Node__AddAttribute(this, attr_name, value); + } + void AddAttribute(const std::string& attr_name, int64_t value) { + g_host->Node__AddAttribute(this, attr_name, value); + } size_t GetInputEdgesCount() const noexcept { return g_host->Node__GetInputEdgesCount(this); } size_t GetOutputEdgesCount() const noexcept { return g_host->Node__GetOutputEdgesCount(this); } @@ -832,6 +874,7 @@ struct Node final { } void operator++() { impl_->operator++(); } + const Node_EdgeEnd& operator*() { return impl_->operator*(); } const Node__EdgeIterator* operator->() const { return impl_.get(); } std::unique_ptr impl_; @@ -906,6 +949,13 @@ struct NodeUnit final { QDQGroup, // The NodeUnit contain a QDQ group of nodes, such as "DQ->Sigmoid->Q" }; + NodeUnit() = delete; + NodeUnit(const NodeUnit&) = delete; + void operator=(const NodeUnit& v) = delete; + + // Need delete because of APIs that return unique_ptr + static void operator delete(void* p) { g_host->NodeUnit__operator_delete(reinterpret_cast(p)); } + Type UnitType() const noexcept { return static_cast(g_host->NodeUnit__UnitType(this)); } const std::vector& Inputs() const noexcept { return g_host->NodeUnit__Inputs(this); } @@ -941,6 +991,9 @@ struct Model final { const IOnnxRuntimeOpSchemaRegistryList* local_registries, const logging::Logger& logger) { return g_host->Model__construct(std::move(model_proto), model_path, local_registries, logger); } + static std::unique_ptr Create(const std::string& graph_name, bool is_onnx_domain_only, const logging::Logger& logger) { + return g_host->Model__construct(graph_name, is_onnx_domain_only, logger); + } static void operator delete(void* p) { g_host->Model__operator_delete(reinterpret_cast(p)); } static Status Load(const PathString& file_path, /*out*/ ONNX_NAMESPACE::ModelProto& model_proto) { return g_host->Model__Load(file_path, model_proto); } @@ -1041,6 +1094,7 @@ class GraphViewer final { const std::string& Name() const noexcept { return g_host->GraphViewer__Name(this); } const std::filesystem::path& ModelPath() const noexcept { return g_host->GraphViewer__ModelPath(this); } + const ConstGraphNodes& Nodes() const noexcept { return g_host->GraphViewer__Nodes(this); } const Node* GetNode(NodeIndex node_index) const { return g_host->GraphViewer__GetNode(this, node_index); } const NodeArg* GetNodeArg(const std::string& name) const { return g_host->GraphViewer__GetNodeArg(this, name); } @@ -1058,6 +1112,9 @@ class GraphViewer final { const std::vector& GetInputs() const noexcept { return g_host->GraphViewer__GetInputs(this); } const std::vector& GetOutputs() const noexcept { return g_host->GraphViewer__GetOutputs(this); } + bool NodeProducesGraphOutput(const Node& node) const { + return g_host->GraphViewer__NodeProducesGraphOutput(this, node); + } const std::unordered_set& GetValueInfo() const noexcept { return g_host->GraphViewer__GetValueInfo(this); } const InitializedTensorSet& GetAllInitializedTensors() const noexcept { return g_host->GraphViewer__GetAllInitializedTensors(this); } @@ -1085,6 +1142,25 @@ class GraphViewer final { void operator=(const GraphViewer&) = delete; }; +struct ConstGraphNodes final { + IteratorHolder begin() const { + return g_host->ConstGraphNodes__begin(this); + } + IteratorHolder end() const { + return g_host->ConstGraphNodes__end(this); + } + IteratorHolder cbegin() const { + return g_host->ConstGraphNodes__cbegin(this); + } + IteratorHolder cend() const { + return g_host->ConstGraphNodes__cend(this); + } + + bool empty() const noexcept { return g_host->ConstGraphNodes__empty(this); } + + PROVIDER_DISALLOW_ALL(ConstGraphNodes) +}; + struct OpKernelContext final { template const T& RequiredInput(int index) const; diff --git a/onnxruntime/core/providers/vitisai/vitisai_profiler.h b/onnxruntime/core/providers/vitisai/vitisai_profiler.h index aedbda31f7b1d..afe4058f7290a 100644 --- a/onnxruntime/core/providers/vitisai/vitisai_profiler.h +++ b/onnxruntime/core/providers/vitisai/vitisai_profiler.h @@ -14,8 +14,8 @@ class VitisaiProfiler final : public EpProfiler { ~VitisaiProfiler() {} bool StartProfiling(TimePoint) override; void EndProfiling(TimePoint, Events&) override; - void Start(uint64_t) override{}; - void Stop(uint64_t) override{}; + void Start(uint64_t) override {} + void Stop(uint64_t) override {} }; #endif diff --git a/onnxruntime/core/providers/webgpu/allocator.cc b/onnxruntime/core/providers/webgpu/allocator.cc index 8e27acdc285d4..51a06ad7643f6 100644 --- a/onnxruntime/core/providers/webgpu/allocator.cc +++ b/onnxruntime/core/providers/webgpu/allocator.cc @@ -1,10 +1,6 @@ // Copyright (c) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. -#ifdef __EMSCRIPTEN__ -#include -#endif - #include "core/framework/session_state.h" #include "core/providers/webgpu/allocator.h" #include "core/providers/webgpu/webgpu_context.h" diff --git a/onnxruntime/core/providers/webgpu/buffer_manager.cc b/onnxruntime/core/providers/webgpu/buffer_manager.cc index 233bb24083289..1c2ff3f91a5a1 100644 --- a/onnxruntime/core/providers/webgpu/buffer_manager.cc +++ b/onnxruntime/core/providers/webgpu/buffer_manager.cc @@ -85,7 +85,7 @@ class SimpleCacheManager : public IBufferCacheManager { void OnRefresh() override { for (auto& buffer : pending_buffers_) { - buffers_[wgpuBufferGetSize(buffer)].push_back(buffer); + buffers_[static_cast(wgpuBufferGetSize(buffer))].push_back(buffer); } pending_buffers_.clear(); } @@ -167,7 +167,7 @@ class BucketCacheManager : public IBufferCacheManager { // TODO: consider graph capture. currently not supported for (auto& buffer : pending_buffers_) { - auto buffer_size = wgpuBufferGetSize(buffer); + auto buffer_size = static_cast(wgpuBufferGetSize(buffer)); auto it = buckets_.find(buffer_size); if (it != buckets_.end() && it->second.size() < buckets_limit_[buffer_size]) { diff --git a/onnxruntime/core/providers/webgpu/buffer_manager.h b/onnxruntime/core/providers/webgpu/buffer_manager.h index 00febfbc29f1b..20bee52835c02 100644 --- a/onnxruntime/core/providers/webgpu/buffer_manager.h +++ b/onnxruntime/core/providers/webgpu/buffer_manager.h @@ -5,10 +5,6 @@ #include -#ifdef __EMSCRIPTEN__ -#include -#endif - #include #include "core/framework/execution_provider.h" diff --git a/onnxruntime/core/providers/webgpu/compute_context.h b/onnxruntime/core/providers/webgpu/compute_context.h index b7ea8a58e232b..680e03aef0aa3 100644 --- a/onnxruntime/core/providers/webgpu/compute_context.h +++ b/onnxruntime/core/providers/webgpu/compute_context.h @@ -3,10 +3,6 @@ #pragma once -#ifdef __EMSCRIPTEN__ -#include -#endif - #include #include @@ -41,6 +37,9 @@ class ComputeContext { inline const wgpu::Limits& DeviceLimits() const { return webgpu_context_.DeviceLimits(); } + inline const wgpu::Device& Device() const { + return webgpu_context_.Device(); + } // // Get the kernel context. diff --git a/onnxruntime/core/providers/webgpu/data_transfer.cc b/onnxruntime/core/providers/webgpu/data_transfer.cc index 615ae11175782..ac376b4fce069 100644 --- a/onnxruntime/core/providers/webgpu/data_transfer.cc +++ b/onnxruntime/core/providers/webgpu/data_transfer.cc @@ -1,10 +1,6 @@ // Copyright (c) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. -#ifdef __EMSCRIPTEN__ -#include -#endif - #include "core/providers/webgpu/data_transfer.h" #include "core/providers/webgpu/webgpu_context.h" diff --git a/onnxruntime/core/providers/webgpu/generator/range.cc b/onnxruntime/core/providers/webgpu/generator/range.cc index ee7c67ec24185..a0b65f08a5b4e 100644 --- a/onnxruntime/core/providers/webgpu/generator/range.cc +++ b/onnxruntime/core/providers/webgpu/generator/range.cc @@ -25,6 +25,11 @@ Status Range::ComputeInternal(ComputeContext& context) const { uint32_t output_size = gsl::narrow(n); RangeProgram program{}; +#if defined(__GNUC__) +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wstrict-aliasing" +#endif + program.AddOutput({output_tensor, ProgramTensorMetadataDependency::Type}) .SetDispatchGroupSize((output_size + WORKGROUP_SIZE - 1) / WORKGROUP_SIZE) .AddUniformVariables({ @@ -33,6 +38,10 @@ Status Range::ComputeInternal(ComputeContext& context) const { *reinterpret_cast(&delta), }); +#if defined(__GNUC__) +#pragma GCC diagnostic pop +#endif + return context.RunProgram(program); } diff --git a/onnxruntime/core/providers/webgpu/math/binary_elementwise_ops.cc b/onnxruntime/core/providers/webgpu/math/binary_elementwise_ops.cc index ec7fed5a94036..75866513e2c7d 100644 --- a/onnxruntime/core/providers/webgpu/math/binary_elementwise_ops.cc +++ b/onnxruntime/core/providers/webgpu/math/binary_elementwise_ops.cc @@ -123,10 +123,10 @@ Status BinaryElementwise::ComputeInternal(ComputeContext& context) const { if (a_last_dim_divisible_by_4 || b_last_dim_divisible_by_4) { vectorize = true; } else { - size_t shared_dimension = 1; + int64_t shared_dimension = 1; for (size_t i = 1; i < output_shape.NumDimensions(); i++) { - size_t dimA = lhs_shape.NumDimensions() >= i ? lhs_shape[lhs_shape.NumDimensions() - i] : 1; - size_t dimB = rhs_shape.NumDimensions() >= i ? rhs_shape[rhs_shape.NumDimensions() - i] : 1; + int64_t dimA = lhs_shape.NumDimensions() >= i ? lhs_shape[lhs_shape.NumDimensions() - i] : 1; + int64_t dimB = rhs_shape.NumDimensions() >= i ? rhs_shape[rhs_shape.NumDimensions() - i] : 1; if (dimA == dimB) { shared_dimension *= dimA; num_shared_dimension++; diff --git a/onnxruntime/core/providers/webgpu/math/unary_elementwise_ops.cc b/onnxruntime/core/providers/webgpu/math/unary_elementwise_ops.cc index 8dcf63671092b..eaaad206ebaf5 100644 --- a/onnxruntime/core/providers/webgpu/math/unary_elementwise_ops.cc +++ b/onnxruntime/core/providers/webgpu/math/unary_elementwise_ops.cc @@ -194,6 +194,10 @@ class Clip final : public UnaryElementwise { "Clip", std::is_same_v ? ClipF16Impl : ClipImpl, "", ShaderUsage::UseElementTypeAlias} {} +#if defined(__GNUC__) +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wstrict-aliasing" +#endif Status ConfigureProgram(const ComputeContext& context, UnaryElementwiseProgram& program) const override { const auto* clip_min_tensor = context.Input(1); @@ -214,6 +218,9 @@ class Clip final : public UnaryElementwise { } return Status::OK(); } +#if defined(__GNUC__) +#pragma GCC diagnostic pop +#endif // uniforms.attr is a f32 value. It is encoded as a float for 2 f16 values. // bitcast>(uniforms.attr)[0] is clip_min, bitcast>(uniforms.attr)[1] is clip_max diff --git a/onnxruntime/core/providers/webgpu/nn/layer_norm.cc b/onnxruntime/core/providers/webgpu/nn/layer_norm.cc index 1ee771e945820..64172021e82f1 100644 --- a/onnxruntime/core/providers/webgpu/nn/layer_norm.cc +++ b/onnxruntime/core/providers/webgpu/nn/layer_norm.cc @@ -85,8 +85,7 @@ Status LayerNorm::ComputeInternal(onnxruntime::webgpu::ComputeContex auto* output = context.Output(0, x_shape); - size_t data_size = x_shape.Size(); - if (data_size == 0) { + if (x_shape.Size() == 0) { return Status::OK(); } diff --git a/onnxruntime/core/providers/webgpu/program.h b/onnxruntime/core/providers/webgpu/program.h index 1562ec158b40a..7bfd9e8800099 100644 --- a/onnxruntime/core/providers/webgpu/program.h +++ b/onnxruntime/core/providers/webgpu/program.h @@ -150,6 +150,11 @@ enum class ProgramTensorMetadataDependency : int { }; std::ostream& operator<<(std::ostream& os, ProgramTensorMetadataDependency); +#if defined(__GNUC__) +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wstrict-aliasing" +#endif + inline ProgramTensorMetadataDependency operator|(ProgramTensorMetadataDependency a, ProgramTensorMetadataDependency b) { return (ProgramTensorMetadataDependency)((int&)a | (int&)b); } @@ -163,6 +168,10 @@ inline ProgramTensorMetadataDependency& operator&=(ProgramTensorMetadataDependen return (ProgramTensorMetadataDependency&)((int&)a &= (int&)b); } +#if defined(__GNUC__) +#pragma GCC diagnostic pop +#endif + constexpr SafeInt WORKGROUP_SIZE = 64; // data type of variable diff --git a/onnxruntime/core/providers/webgpu/program_manager.cc b/onnxruntime/core/providers/webgpu/program_manager.cc index 109bac34d6503..1fdd312d4f0d8 100644 --- a/onnxruntime/core/providers/webgpu/program_manager.cc +++ b/onnxruntime/core/providers/webgpu/program_manager.cc @@ -147,16 +147,16 @@ Status ProgramManager::Build(const ProgramBase& program, } } - wgpu::ProgrammableStageDescriptor compute_stage{}; - compute_stage.module = shader_module; - compute_stage.entryPoint = "main"; + wgpu::ComputeState compute_state{}; + compute_state.module = shader_module; + compute_state.entryPoint = "main"; if (!constant_entries.empty()) { - compute_stage.constants = constant_entries.data(); - compute_stage.constantCount = constant_entries.size(); + compute_state.constants = constant_entries.data(); + compute_state.constantCount = constant_entries.size(); } wgpu::ComputePipelineDescriptor pipeline_descriptor{}; - pipeline_descriptor.compute = compute_stage; + pipeline_descriptor.compute = compute_state; #ifndef NDEBUG // if debug build pipeline_descriptor.label = program.Name().c_str(); #endif diff --git a/onnxruntime/core/providers/webgpu/program_manager.h b/onnxruntime/core/providers/webgpu/program_manager.h index eded1cfa17970..55721770014d2 100644 --- a/onnxruntime/core/providers/webgpu/program_manager.h +++ b/onnxruntime/core/providers/webgpu/program_manager.h @@ -3,10 +3,6 @@ #pragma once -#ifdef __EMSCRIPTEN__ -#include -#endif - #include #include diff --git a/onnxruntime/core/providers/webgpu/shader_helper.cc b/onnxruntime/core/providers/webgpu/shader_helper.cc index 5685494556248..49c9a84f69551 100644 --- a/onnxruntime/core/providers/webgpu/shader_helper.cc +++ b/onnxruntime/core/providers/webgpu/shader_helper.cc @@ -63,6 +63,11 @@ Status ShaderHelper::Init() { " @builtin(workgroup_id) workgroup_id : vec3,\n" " @builtin(local_invocation_index) local_idx : u32,\n" " @builtin(local_invocation_id) local_id : vec3"; + if (device_.HasFeature(wgpu::FeatureName::Subgroups)) { + body_ss_ << ",\n" + " @builtin(subgroup_invocation_id) sg_id : u32,\n" + " @builtin(subgroup_size) sg_size : u32"; + } if (!is_1d_dispatch) { body_ss_ << ",\n" " @builtin(num_workgroups) num_workgroups : vec3"; diff --git a/onnxruntime/core/providers/webgpu/shader_helper.h b/onnxruntime/core/providers/webgpu/shader_helper.h index a4b96edc63c74..dac08f3bd9368 100644 --- a/onnxruntime/core/providers/webgpu/shader_helper.h +++ b/onnxruntime/core/providers/webgpu/shader_helper.h @@ -5,10 +5,6 @@ #include -#ifdef __EMSCRIPTEN__ -#include -#endif - #include #include "core/framework/tensor_shape.h" diff --git a/onnxruntime/core/providers/webgpu/shader_variable.cc b/onnxruntime/core/providers/webgpu/shader_variable.cc index 15020b801c97d..5e5920f582251 100644 --- a/onnxruntime/core/providers/webgpu/shader_variable.cc +++ b/onnxruntime/core/providers/webgpu/shader_variable.cc @@ -159,10 +159,8 @@ void ShaderIndicesHelper::Impl(std::ostream& ss) const { SS_APPEND(ss, " var current = offset;\n"); for (int i = 0; i < rank_ - 1; i++) { auto current_stride = GetElementAt(stride, i, rank_ - 1); - SS_APPEND(ss, " let dim", i, " = current / ", current_stride, ";\n"); - SS_APPEND(ss, " let rest", i, " = current % ", current_stride, ";\n"); - SS_APPEND(ss, " indices[", i, "] = dim", i, ";\n"); - SS_APPEND(ss, " current = rest", i, ";\n"); + SS_APPEND(ss, " indices[", i, "] = current / ", current_stride, ";\n"); + SS_APPEND(ss, " current = current % ", current_stride, ";\n"); } SS_APPEND(ss, " indices[", rank_ - 1, "] = current;\n"); SS_APPEND(ss, " return indices;\n"); diff --git a/onnxruntime/core/providers/webgpu/shader_variable.h b/onnxruntime/core/providers/webgpu/shader_variable.h index 4c87bc9158890..2aba2a59d157f 100644 --- a/onnxruntime/core/providers/webgpu/shader_variable.h +++ b/onnxruntime/core/providers/webgpu/shader_variable.h @@ -189,6 +189,10 @@ class ShaderVariableHelper : public ShaderIndicesHelper { friend class ShaderHelper; }; +#if defined(__GNUC__) +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wstrict-aliasing" +#endif inline ShaderUsage operator|(ShaderUsage a, ShaderUsage b) { return (uint32_t)a.usage | (uint32_t)b.usage; @@ -205,6 +209,10 @@ inline ShaderUsage& operator&=(ShaderUsage& a, ShaderUsage b) { return a; } +#if defined(__GNUC__) +#pragma GCC diagnostic pop +#endif + namespace detail { template >> std::string pass_as_string(T&& v) { diff --git a/onnxruntime/core/providers/webgpu/tensor/concat.cc b/onnxruntime/core/providers/webgpu/tensor/concat.cc index c708f24dcc330..5ed8099fde05e 100644 --- a/onnxruntime/core/providers/webgpu/tensor/concat.cc +++ b/onnxruntime/core/providers/webgpu/tensor/concat.cc @@ -106,7 +106,8 @@ Status Concat::ComputeInternal(ComputeContext& context) const { uint32_t output_size = gsl::narrow_cast(prepare.output_tensor->Shape().Size()); - ConcatProgram program{prepare.axis}; + size_t axis = static_cast(prepare.axis); + ConcatProgram program{axis}; std::vector sizes_in_concat_axis; sizes_in_concat_axis.reserve(input_count); @@ -118,7 +119,7 @@ Status Concat::ComputeInternal(ComputeContext& context) const { } program.AddInput({input.tensor, ProgramTensorMetadataDependency::TypeAndRank}); - auto axis_size = input.tensor->Shape()[prepare.axis]; + auto axis_size = input.tensor->Shape()[axis]; sum += static_cast(axis_size); sizes_in_concat_axis.push_back(sum); } diff --git a/onnxruntime/core/providers/webgpu/tensor/flatten.h b/onnxruntime/core/providers/webgpu/tensor/flatten.h index 5fc49a844b404..68f47eacd80a1 100644 --- a/onnxruntime/core/providers/webgpu/tensor/flatten.h +++ b/onnxruntime/core/providers/webgpu/tensor/flatten.h @@ -31,15 +31,8 @@ class Flatten final : public OpKernel { return Status(common::ONNXRUNTIME, common::FAIL, "Invalid value for axis, must be less than or equal to input_rank"); } - int64_t first_dim = 1; - for (int64_t i = 0; i < axis; i++) { - first_dim *= input_shape[i]; - } - - int64_t second_dim = 1; - for (int64_t i = axis; i < input_rank; i++) { - second_dim *= input_shape[i]; - } + int64_t first_dim = input_shape.SizeToDimension(static_cast(axis)); + int64_t second_dim = input_shape.SizeFromDimension(static_cast(axis)); TensorShape output_shape({first_dim, second_dim}); Tensor* output_tensor = context->Output(0, output_shape); @@ -59,4 +52,4 @@ class Flatten final : public OpKernel { }; } // namespace webgpu -} // namespace onnxruntime \ No newline at end of file +} // namespace onnxruntime diff --git a/onnxruntime/core/providers/webgpu/tensor/gather_elements.cc b/onnxruntime/core/providers/webgpu/tensor/gather_elements.cc index 00d8caf2624a9..4938cd7acfc16 100644 --- a/onnxruntime/core/providers/webgpu/tensor/gather_elements.cc +++ b/onnxruntime/core/providers/webgpu/tensor/gather_elements.cc @@ -59,7 +59,7 @@ Status GatherElements::ComputeInternal(ComputeContext& context) const { axis += input_rank; } - auto axis_dim_limit = input_shape[axis]; + auto axis_dim_limit = input_shape[static_cast(axis)]; auto output_dims = indices_shape.AsShapeVector(); TensorShape output_shape(output_dims); @@ -83,4 +83,4 @@ Status GatherElements::ComputeInternal(ComputeContext& context) const { } } // namespace webgpu -} // namespace onnxruntime \ No newline at end of file +} // namespace onnxruntime diff --git a/onnxruntime/core/providers/webgpu/tensor/slice.cc b/onnxruntime/core/providers/webgpu/tensor/slice.cc new file mode 100644 index 0000000000000..a201c13de3fbc --- /dev/null +++ b/onnxruntime/core/providers/webgpu/tensor/slice.cc @@ -0,0 +1,278 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +#include "core/common/inlined_containers.h" +#include "core/providers/webgpu/tensor/slice.h" +#include "core/providers/cpu/tensor/utils.h" +#include "core/providers/webgpu/shader_helper.h" +#include "core/providers/webgpu/webgpu_supported_types.h" + +namespace onnxruntime { +namespace webgpu { + +ONNX_OPERATOR_VERSIONED_KERNEL_EX( + Slice, + kOnnxDomain, + 1, 9, + kWebGpuExecutionProvider, + (*KernelDefBuilder::Create()) + .TypeConstraint("T", WebGpuSupportedFloatTypes()), + Slice); + +ONNX_OPERATOR_VERSIONED_KERNEL_EX( + Slice, + kOnnxDomain, + 10, 10, + kWebGpuExecutionProvider, + (*KernelDefBuilder::Create()) + .TypeConstraint("T", WebGpuSupportedFloatTypes()) + .InputMemoryType(OrtMemTypeCPU, 1) + .InputMemoryType(OrtMemTypeCPU, 2) + .InputMemoryType(OrtMemTypeCPU, 3) + .InputMemoryType(OrtMemTypeCPU, 4), + Slice); + +ONNX_OPERATOR_VERSIONED_KERNEL_EX( + Slice, + kOnnxDomain, + 11, 12, + kWebGpuExecutionProvider, + (*KernelDefBuilder::Create()) + .TypeConstraint("T", WebGpuSupportedFloatTypes()) + .InputMemoryType(OrtMemTypeCPU, 1) + .InputMemoryType(OrtMemTypeCPU, 2) + .InputMemoryType(OrtMemTypeCPU, 3) + .InputMemoryType(OrtMemTypeCPU, 4), + Slice); + +ONNX_OPERATOR_KERNEL_EX( + Slice, + kOnnxDomain, + 13, + kWebGpuExecutionProvider, + (*KernelDefBuilder::Create()) + .TypeConstraint("T", WebGpuSupportedFloatTypes()) + .InputMemoryType(OrtMemTypeCPU, 1) + .InputMemoryType(OrtMemTypeCPU, 2) + .InputMemoryType(OrtMemTypeCPU, 3) + .InputMemoryType(OrtMemTypeCPU, 4), + Slice); + +Status SliceProgram::GenerateShaderCode(ShaderHelper& shader) const { + const ShaderVariableHelper& input = shader.AddInput("input", ShaderUsage::UseUniform | ShaderUsage::UseIndicesTypeAlias); + const ShaderVariableHelper& output = shader.AddOutput("output", ShaderUsage::UseUniform | ShaderUsage::UseIndicesTypeAlias); + + shader.MainFunctionBody() << shader.GuardAgainstOutOfBoundsWorkgroupSizes("uniforms.output_size") + << "let output_indices = " << output.OffsetToIndices("global_idx") << ";\n" + << "var input_indices: input_indices_t;\n" + << "var carry = 0u;\n"; + + for (int i = input.Rank() - 1; i >= 0; i--) { + std::string input_shape_i = absl::StrCat("input_shape_", i); + std::string steps_i = absl::StrCat("steps_", i); + std::string starts_i = absl::StrCat("starts_", i); + std::string output_index_i = absl::StrCat("output_index_", i); + std::string input_index_i = absl::StrCat("input_index_", i); + + shader.MainFunctionBody() << "let " << input_shape_i << " = " << input.IndicesGet("uniforms.input_shape", i) << ";\n" + << "let " << steps_i << " = " << input.IndicesGet("uniforms.steps", i) << ";\n" + << "let " << starts_i << " = " << input.IndicesGet("uniforms.starts", i) << ";\n" + << "var " << output_index_i << " = " << output.IndicesGet("output_indices", i) << ";\n" + << "var " << input_index_i << " = " << output_index_i << " * " << steps_i << " + " << starts_i << " + carry;\n" + << "carry = " << input_index_i << " / " << input_shape_i << ";\n" + << input_index_i << " = " << input_index_i << " % " << input_shape_i << ";\n" + << "if (" << input.IndicesGet("uniforms.signs", i) << " < 0) {\n" + << " " << input_index_i << " = " << input_shape_i << " - " << input_index_i << " - 1u + " << starts_i << ";\n" + << "}\n" + << input.IndicesSet("input_indices", i, input_index_i) << ";\n"; + } + + shader.MainFunctionBody() << output.SetByOffset("global_idx", input.GetByIndices("input_indices")); + + return Status::OK(); +} + +Status Slice::ComputeInternal(ComputeContext& context) const { + // READ INPUTS + const Tensor* input_tensor = context.Input(0); + const TensorShape& input_shape = input_tensor->Shape(); + int64_t input_rank = static_cast(input_shape.NumDimensions()); + + auto starts_raw = attr_starts_.empty() ? context.Input(1)->DataAsSpan() : gsl::make_span(attr_starts_); + auto ends_raw = attr_ends_.empty() ? context.Input(2)->DataAsSpan() : gsl::make_span(attr_ends_); + + ORT_ENFORCE(starts_raw.size() == ends_raw.size(), "starts and ends must have the same size"); + + int input_count = context.InputCount(); + + const Tensor* axes_tensor = nullptr; + const Tensor* steps_tensor = nullptr; + + if (input_count >= 4) { + // axes provided as input + axes_tensor = context.Input(3); + } + + if (input_count == 5) { + // steps provided as input + steps_tensor = context.Input(4); + } + + // Inject defaults if axes or steps not provided + std::vector axes_default; + if (axes_tensor == nullptr) { + // if axes not provided, set to [0, ..., len(starts)-1] + for (size_t i = 0; i < starts_raw.size(); i++) { + axes_default.push_back(i); + } + } + auto axes_raw = attr_axes_.empty() ? (axes_tensor == nullptr ? gsl::make_span(axes_default) : axes_tensor->DataAsSpan()) : gsl::make_span(attr_axes_); + + std::vector steps_default; + if (steps_tensor == nullptr) { + // if steps not provided, set to [1, ..., 1] of len(starts) + for (size_t i = 0; i < starts_raw.size(); i++) { + steps_default.push_back(1); + } + } + auto steps_raw = steps_tensor == nullptr ? gsl::make_span(steps_default) : steps_tensor->DataAsSpan(); + + // PROCESS INPUTS + std::vector axes; + for (unsigned int i = 0; i < axes_raw.size(); i++) { + int64_t val = axes_raw[i]; + if (val < 0) { + val += input_rank; + } + axes.push_back(static_cast(val)); + } + + std::vector starts; + for (unsigned int i = 0; i < starts_raw.size(); i++) { + int64_t val = starts_raw[i]; + if (val < 0) { + val += input_shape[axes[i]]; + } + + if (steps_raw[i] < 0) { + val = std::max(static_cast(0), std::min(val, static_cast(input_shape[axes[i]] - 1))); + } else { + val = std::max(static_cast(0), std::min(val, static_cast(input_shape[axes[i]]))); + } + starts.push_back(static_cast(val)); + } + + std::vector ends; + for (unsigned int i = 0; i < ends_raw.size(); i++) { + int64_t val = ends_raw[i]; + if (val < 0) { + val += input_shape[axes[i]]; + } + if (steps_raw[i] < 0) { + val = std::max(static_cast(0), std::min(val, static_cast(input_shape[axes[i]] - 1))); + } else { + val = std::max(static_cast(0), std::min(val, static_cast(input_shape[axes[i]]))); + } + ends.push_back(static_cast(val)); + } + + // temporary steps vector to handle negative steps + std::vector steps_tmp; + for (unsigned int i = 0; i < steps_raw.size(); i++) { + if (steps_raw[i] >= std::numeric_limits::max()) { + steps_tmp.push_back(std::numeric_limits::max()); + } else { + steps_tmp.push_back(static_cast(steps_raw[i])); + } + } + + // Insert missing dimensions + if (static_cast(axes.size()) != input_rank) { + for (uint32_t i = 0; i < input_rank; i++) { + int idx = -1; + for (unsigned int j = 0; j < axes_raw.size(); j++) { + if (axes_raw[j] == i) { + idx = j; + break; + } + } + if (idx == -1) { + axes.insert(axes.begin() + i, i); + starts.insert(starts.begin() + i, 0); + ends.insert(ends.begin() + i, static_cast(input_shape[i])); + steps_tmp.insert(steps_tmp.begin() + i, 1); + } + } + } + + // retain the sign of the steps + std::vector signs; + for (unsigned int i = 0; i < steps_tmp.size(); i++) { + signs.push_back(steps_tmp[i] < 0 ? -1 : (steps_tmp[i] > 0 ? 1 : 0)); + } + + // Convert negative steps to positive steps and reverse starts and ends + for (unsigned int i = 0; i < steps_tmp.size(); i++) { + if (steps_tmp[i] < 0) { + float numSteps = static_cast((static_cast(ends[i]) - static_cast(starts[i])) / static_cast(steps_tmp[i])); + float newEnd = static_cast(starts[i]); + float newStart = newEnd + numSteps * static_cast(steps_tmp[i]); + + starts[i] = static_cast(newStart); + ends[i] = static_cast(newEnd); + steps_tmp[i] = static_cast(-steps_tmp[i]); + } + } + + // final steps vector of type unsigned int + std::vector steps; + for (unsigned int i = 0; i < steps_tmp.size(); i++) { + steps.push_back(static_cast(steps_tmp[i])); + } + + // Reorder inputs in order of axis + std::vector signs_reordered; + std::vector steps_reordered, starts_reordered; + for (unsigned int i = 0; i < axes.size(); i++) { + signs_reordered.push_back(0); + steps_reordered.push_back(0); + starts_reordered.push_back(0); + } + for (unsigned int i = 0; i < axes.size(); i++) { + int32_t dim = axes[i]; + signs_reordered[dim] = signs[i]; + steps_reordered[dim] = steps[i]; + starts_reordered[dim] = starts[i]; + } + + // calculate output dims + std::vector output_dims; + for (unsigned int i = 0; i < axes.size(); i++) { + int32_t dim = axes[i]; + float tmp = ceil((static_cast(ends[dim]) - static_cast(starts[dim])) / static_cast(steps[dim])); + if (tmp < 0) + output_dims.push_back(0); + else + output_dims.push_back(static_cast(tmp)); + } + + TensorShape output_shape(output_dims); + + auto* output_tensor = context.Output(0, output_shape); + uint32_t output_size = static_cast(output_shape.Size()); + + if (output_size == 0) { + return Status::OK(); + } + + SliceProgram program{}; + program + .AddInputs({{input_tensor, ProgramTensorMetadataDependency::TypeAndRank}}) + .AddOutputs({output_tensor}) + .SetDispatchGroupSize((output_size + WORKGROUP_SIZE - 1) / WORKGROUP_SIZE) + .AddUniformVariables({{output_size}, {starts_reordered}, {steps_reordered}, {signs_reordered}}); + return context.RunProgram(program); +} + +} // namespace webgpu +} // namespace onnxruntime \ No newline at end of file diff --git a/onnxruntime/core/providers/webgpu/tensor/slice.h b/onnxruntime/core/providers/webgpu/tensor/slice.h new file mode 100644 index 0000000000000..aac68b80d297e --- /dev/null +++ b/onnxruntime/core/providers/webgpu/tensor/slice.h @@ -0,0 +1,41 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +#pragma once + +#include "core/providers/webgpu/webgpu_kernel.h" +#include "core/providers/webgpu/program.h" + +namespace onnxruntime { +namespace webgpu { + +class SliceProgram final : public Program { + public: + SliceProgram() : Program{"Slice"} {} + + Status GenerateShaderCode(ShaderHelper& sh) const override; + + WEBGPU_PROGRAM_DEFINE_UNIFORM_VARIABLES({"output_size", ProgramUniformVariableDataType::Uint32}, + {"starts", ProgramUniformVariableDataType::Uint32}, + {"steps", ProgramUniformVariableDataType::Uint32}, + {"signs", ProgramUniformVariableDataType::Int32}); +}; + +class Slice final : public WebGpuKernel { + public: + Slice(const OpKernelInfo& info) : WebGpuKernel(info) { + // since only opset1-9 provides these as attributes, we can safely ignore the return value + // we handle failure in fetching the attribute in ComputeInternal + (void)info.GetAttrs("starts", attr_starts_); + (void)info.GetAttrs("ends", attr_ends_); + (void)info.GetAttrs("axes", attr_axes_); + } + + Status ComputeInternal(ComputeContext& context) const override; + + private: + std::vector attr_starts_, attr_ends_, attr_axes_; +}; + +} // namespace webgpu +} // namespace onnxruntime \ No newline at end of file diff --git a/onnxruntime/core/providers/webgpu/tensor/split.cc b/onnxruntime/core/providers/webgpu/tensor/split.cc new file mode 100644 index 0000000000000..83bf832cc5b11 --- /dev/null +++ b/onnxruntime/core/providers/webgpu/tensor/split.cc @@ -0,0 +1,162 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +#include "core/providers/webgpu/tensor/split.h" +#include "core/providers/webgpu/shader_helper.h" +#include "core/providers/webgpu/webgpu_supported_types.h" + +namespace onnxruntime { +namespace webgpu { + +namespace { + +// Helper function to calculate the output index based on the input index and the sizes of the splits. +void CalculateOutputIndex(std::ostream& os, size_t output_count) { + os << "fn calculate_output_index(index: u32) -> u32 {\n" + << " for (var i: u32 = 0u; i < " << output_count << "u; i += 1u ) {\n" + << " if (index < " << GetElementAt("uniforms.sizes_in_split_axis", "i", output_count) << ") {\n" + << " return i;\n" + << " }\n" + << " }\n" + << " return " << output_count << "u;\n" + << "}\n"; +} + +// Helper function to write the buffer data for each output. +void WriteBufferData(std::ostream& os, const ShaderVariableHelper& input, + gsl::span outputs) { + os << "fn write_buffer_data(output_number: u32, global_idx: u32, indices: output_0_indices_t) {\n"; + for (size_t i = 0; i < outputs.size(); ++i) { + const auto buffer_write = outputs[i]->SetByIndices("indices", input.GetByOffset("global_idx")); + if (outputs.size() == 1) { + os << buffer_write; + } else if (i == 0) { + os << " if (output_number == 0u) {\n" + << " " << buffer_write << "\n"; + } else if (i == outputs.size() - 1) { + os << " } else {\n" + << " " << buffer_write << "\n"; + } else { + os << " } else if (output_number == " << i << "u) {\n" + << " " << buffer_write << "\n"; + } + } + os << " }\n" + << "}\n"; +} + +} // namespace + +Status SplitProgram::GenerateShaderCode(ShaderHelper& shader) const { + const auto& input = shader.AddInput("input", ShaderUsage::UseUniform | ShaderUsage::UseIndicesTypeAlias); + + size_t output_count = Outputs().size(); + std::vector outputs; + outputs.reserve(output_count); + for (size_t i = 0; i < output_count; ++i) { + outputs.push_back( + &shader.AddOutput("output_" + std::to_string(i), ShaderUsage::UseUniform | ShaderUsage::UseIndicesTypeAlias)); + } + + // Add implementation of fn calculate_output_index. + CalculateOutputIndex(shader.AdditionalImplementation(), output_count); + // Add implementation of fn write_buffer_data. + WriteBufferData(shader.AdditionalImplementation(), input, outputs); + + shader.MainFunctionBody() << shader.GuardAgainstOutOfBoundsWorkgroupSizes("uniforms.input_size") + << " var indices = " << input.OffsetToIndices("global_idx") << ";\n" + << " var index = " << input.IndicesGet("indices", axis_) << ";\n" + << " let output_number = calculate_output_index(index);\n" + << " if (output_number != 0u) {\n" + << " index -= uniforms.sizes_in_split_axis[output_number - 1u];\n" + << " " << input.IndicesSet("indices", axis_, "index") << "\n" + << " }\n" + << " write_buffer_data(output_number, global_idx, indices);\n"; + + return Status::OK(); +} + +Status Split::ComputeInternal(ComputeContext& context) const { + const Tensor* input = context.Input(0); + auto& input_shape = input->Shape(); + auto num_outputs = context.OutputCount(); + + int64_t axis = axis_; + std::vector split_sizes; + + split_sizes.assign(split_sizes_.begin(), split_sizes_.end()); + // Compute split_sizes from the 'split' input tensor. + if (split_sizes_.size() == 0 && context.InputCount() > 1) { + const Tensor* split_tensor = context.Input(1); + // Check if split_tensor is valid. + if (split_tensor != nullptr) { + ORT_ENFORCE(split_tensor->Shape().NumDimensions() == 1, "The split tensor must be a vector tensor."); + // Get split_sizes from the input tensor. + auto nDims = static_cast(split_tensor->Shape()[0]); + const auto* data = split_tensor->Data(); + split_sizes.assign(data, data + nDims); + } + } + + // The variables below are not actually used in the current implementation. + int before_dims = 0; + int after_dims_including_split_axis = 0; + int after_dims_excluding_split = 0; + // This handles the case where the axis is negative. It also splits outputs evenly according to num_ouputs if + // split_sizes is empty. + ORT_RETURN_IF_ERROR(PrepareForCompute(input_shape, num_outputs, axis, before_dims, after_dims_including_split_axis, + after_dims_excluding_split, split_sizes)); + + SplitProgram program{gsl::narrow_cast(axis)}; + program.AddInput({input, ProgramTensorMetadataDependency::TypeAndRank}); + + auto output_dimensions = input_shape.AsShapeVector(); + for (int i = 0; i < num_outputs; ++i) { + // Update the size of dimension for axis we're splitting on. + auto split_size = narrow(split_sizes[i]); + output_dimensions[narrow(axis)] = split_size; + + Tensor* output = context.Output(i, TensorShape{output_dimensions}); + program.AddOutput({output, ProgramTensorMetadataDependency::Rank}); + } + + uint32_t input_size = gsl::narrow(input_shape.Size()); + // Early return if the input tensor is empty. + if (input_size == 0) { + return Status::OK(); + } + + uint32_t previous_sum = 0; + std::vector sizes_in_split_axis; + // sizes_in_split_axis are the cumulative sizes of the splits in the split axis. + for (auto split_size : split_sizes) { + previous_sum += gsl::narrow(split_size); + sizes_in_split_axis.push_back(previous_sum); + } + + program + .SetDispatchGroupSize((input_size + WORKGROUP_SIZE - 1) / WORKGROUP_SIZE) + .CacheHint(std::to_string(axis)) + .AddUniformVariables( + {input_size, gsl::span(sizes_in_split_axis.data(), sizes_in_split_axis.size())}); + return context.RunProgram(program); +} + +#define WEBGPU_SPLIT_KERNEL(OP_TYPE, VERSION, KERNEL_CLASS, TYPE) \ + ONNX_OPERATOR_KERNEL_EX(OP_TYPE, kOnnxDomain, VERSION, kWebGpuExecutionProvider, \ + KernelDefBuilder().TypeConstraint("T", TYPE).InputMemoryType(OrtMemTypeCPU, 1), \ + KERNEL_CLASS); + +#define WEBGPU_SPLIT_VERSIONED_KERNEL(OP_TYPE, VERSION_FROM, VERSION_TO, KERNEL_CLASS, TYPE) \ + ONNX_OPERATOR_VERSIONED_KERNEL_EX(OP_TYPE, kOnnxDomain, VERSION_FROM, VERSION_TO, kWebGpuExecutionProvider, \ + KernelDefBuilder().TypeConstraint("T", TYPE).InputMemoryType(OrtMemTypeCPU, 1), \ + KERNEL_CLASS); + +WEBGPU_SPLIT_VERSIONED_KERNEL(Split, 1, 1, Split_1, WebGpuSupportedNumberTypes()) +WEBGPU_SPLIT_VERSIONED_KERNEL(Split, 2, 10, Split_2_10, WebGpuSupportedNumberTypes()) +WEBGPU_SPLIT_VERSIONED_KERNEL(Split, 11, 12, Split_11_12, WebGpuSupportedNumberTypes()) +WEBGPU_SPLIT_VERSIONED_KERNEL(Split, 13, 17, Split_13_17, WebGpuSupportedNumberTypes()) +WEBGPU_SPLIT_KERNEL(Split, 18, Split_18, WebGpuSupportedNumberTypes()); + +} // namespace webgpu +} // namespace onnxruntime diff --git a/onnxruntime/core/providers/webgpu/tensor/split.h b/onnxruntime/core/providers/webgpu/tensor/split.h new file mode 100644 index 0000000000000..39ca7a565118a --- /dev/null +++ b/onnxruntime/core/providers/webgpu/tensor/split.h @@ -0,0 +1,61 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +#pragma once + +#include "core/providers/webgpu/program.h" +#include "core/providers/webgpu/webgpu_kernel.h" +#include "core/providers/common.h" +#include "core/providers/cpu/tensor/split.h" + +namespace onnxruntime { +namespace webgpu { + +class SplitProgram final : public Program { + public: + SplitProgram(const uint32_t axis) : Program{"Split"}, axis_{axis} {} + + Status GenerateShaderCode(ShaderHelper& sh) const override; + + WEBGPU_PROGRAM_DEFINE_UNIFORM_VARIABLES({"input_size", ProgramUniformVariableDataType::Uint32}, + {"sizes_in_split_axis", ProgramUniformVariableDataType::Uint32}); + + private: + uint32_t axis_; +}; + +class Split : public WebGpuKernel, public SplitBase { + public: + Split(const OpKernelInfo& info, uint32_t opset) : WebGpuKernel(info), SplitBase(info, opset) {} + + protected: + Status ComputeInternal(ComputeContext& context) const override; +}; + +class Split_1 final : public Split { + public: + Split_1(const OpKernelInfo& info) : Split(info, 1) {} +}; + +class Split_2_10 final : public Split { + public: + Split_2_10(const OpKernelInfo& info) : Split(info, 2) {} +}; + +class Split_11_12 final : public Split { + public: + Split_11_12(const OpKernelInfo& info) : Split(info, 11) {} +}; + +class Split_13_17 final : public Split { + public: + Split_13_17(const OpKernelInfo& info) : Split(info, 13) {} +}; + +class Split_18 final : public Split { + public: + Split_18(const OpKernelInfo& info) : Split(info, 18) {} +}; + +} // namespace webgpu +} // namespace onnxruntime diff --git a/onnxruntime/core/providers/webgpu/webgpu_context.cc b/onnxruntime/core/providers/webgpu/webgpu_context.cc index 7e7ff7dbde80e..99a645878cd7e 100644 --- a/onnxruntime/core/providers/webgpu/webgpu_context.cc +++ b/onnxruntime/core/providers/webgpu/webgpu_context.cc @@ -4,10 +4,20 @@ #include #include +#if defined(__GNUC__) +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wstrict-aliasing" +#endif + +#if !defined(__wasm__) #include "dawn/dawn_proc.h" #if !defined(USE_EXTERNAL_DAWN) #include "dawn/native/DawnNative.h" #endif +#endif +#if defined(__GNUC__) +#pragma GCC diagnostic pop +#endif #include "core/common/common.h" #include "core/common/path_string.h" @@ -29,7 +39,7 @@ void WebGpuContext::Initialize(const WebGpuBufferCacheConfig& buffer_cache_confi std::call_once(init_flag_, [this, &buffer_cache_config, backend_type]() { // Create wgpu::Adapter if (adapter_ == nullptr) { -#if !defined(__EMSCRIPTEN__) && defined(_MSC_VER) && defined(DAWN_ENABLE_D3D12) && !defined(USE_EXTERNAL_DAWN) +#if !defined(__wasm__) && defined(_MSC_VER) && defined(DAWN_ENABLE_D3D12) && !defined(USE_EXTERNAL_DAWN) // If we are using the D3D12 backend on Windows and the build does not use external Dawn, dxil.dll and dxcompiler.dll are required. // // Dawn will try to load them later, but if they are in the different directory to the executable, it may fail to find them. @@ -54,15 +64,19 @@ void WebGpuContext::Initialize(const WebGpuBufferCacheConfig& buffer_cache_confi #endif wgpu::RequestAdapterOptions req_adapter_options = {}; - wgpu::DawnTogglesDescriptor adapter_toggles_desc = {}; - req_adapter_options.nextInChain = &adapter_toggles_desc; req_adapter_options.backendType = static_cast(backend_type); req_adapter_options.powerPreference = wgpu::PowerPreference::HighPerformance; +#if !defined(__wasm__) auto enabled_adapter_toggles = GetEnabledAdapterToggles(); + + wgpu::DawnTogglesDescriptor adapter_toggles_desc = {}; adapter_toggles_desc.enabledToggleCount = enabled_adapter_toggles.size(); adapter_toggles_desc.enabledToggles = enabled_adapter_toggles.data(); + req_adapter_options.nextInChain = &adapter_toggles_desc; +#endif + ORT_ENFORCE(wgpu::WaitStatus::Success == instance_.WaitAny(instance_.RequestAdapter( &req_adapter_options, wgpu::CallbackMode::WaitAnyOnly, @@ -78,6 +92,8 @@ void WebGpuContext::Initialize(const WebGpuBufferCacheConfig& buffer_cache_confi // Create wgpu::Device if (device_ == nullptr) { wgpu::DeviceDescriptor device_desc = {}; + +#if !defined(__wasm__) wgpu::DawnTogglesDescriptor device_toggles_desc = {}; device_desc.nextInChain = &device_toggles_desc; @@ -88,6 +104,7 @@ void WebGpuContext::Initialize(const WebGpuBufferCacheConfig& buffer_cache_confi auto disabled_device_toggles = GetDisabledDeviceToggles(); device_toggles_desc.disabledToggleCount = disabled_device_toggles.size(); device_toggles_desc.disabledToggles = disabled_device_toggles.data(); +#endif std::vector required_features = GetAvailableRequiredFeatures(adapter_); if (required_features.size() > 0) { @@ -98,13 +115,12 @@ void WebGpuContext::Initialize(const WebGpuBufferCacheConfig& buffer_cache_confi device_desc.requiredLimits = &required_limits; // TODO: revise temporary error handling - device_desc.SetUncapturedErrorCallback([](const wgpu::Device& /*device*/, wgpu::ErrorType type, const char* message) { - LOGS_DEFAULT(ERROR) << "WebGPU device error(" << int(type) << "): " << message; + device_desc.SetUncapturedErrorCallback([](const wgpu::Device& /*device*/, wgpu::ErrorType type, wgpu::StringView message) { + LOGS_DEFAULT(ERROR) << "WebGPU device error(" << int(type) << "): " << std::string_view{message}; }); // TODO: revise temporary device lost handling - device_desc.SetDeviceLostCallback(wgpu::CallbackMode::AllowSpontaneous, [](const wgpu::Device& /*device*/, wgpu::DeviceLostReason reason, const char* message) { - // cannot use ORT logger because it may be already destroyed - std::cerr << "WebGPU device lost (" << int(reason) << "): " << message; + device_desc.SetDeviceLostCallback(wgpu::CallbackMode::AllowSpontaneous, [](const wgpu::Device& /*device*/, wgpu::DeviceLostReason reason, wgpu::StringView message) { + LOGS_DEFAULT(INFO) << "WebGPU device lost (" << int(reason) << "): " << std::string_view{message}; }); ORT_ENFORCE(wgpu::WaitStatus::Success == instance_.WaitAny(adapter_.RequestDevice( @@ -136,9 +152,12 @@ void WebGpuContext::Initialize(const WebGpuBufferCacheConfig& buffer_cache_confi program_mgr_ = std::make_unique(Device(), DeviceLimits()); // set query type +#if !defined(__wasm__) if (device_.HasFeature(wgpu::FeatureName::ChromiumExperimentalTimestampQueryInsidePasses)) { query_type_ = TimestampQueryType::InsidePasses; - } else if (device_.HasFeature(wgpu::FeatureName::TimestampQuery)) { + } else +#endif + if (device_.HasFeature(wgpu::FeatureName::TimestampQuery)) { query_type_ = TimestampQueryType::AtPasses; } else { query_type_ = TimestampQueryType::None; @@ -456,7 +475,9 @@ std::vector WebGpuContext::GetDisabledDeviceToggles() const { std::vector WebGpuContext::GetAvailableRequiredFeatures(const wgpu::Adapter& adapter) const { std::vector required_features; constexpr wgpu::FeatureName features[]{ +#if !defined(__wasm__) wgpu::FeatureName::ChromiumExperimentalTimestampQueryInsidePasses, +#endif wgpu::FeatureName::TimestampQuery, wgpu::FeatureName::ShaderF16, wgpu::FeatureName::Subgroups, @@ -531,7 +552,7 @@ void WebGpuContext::CollectProfilingData(profiling::Events& events) { ORT_ENFORCE(Wait(query_read_buffer.MapAsync(wgpu::MapMode::Read, 0, - query_read_buffer.GetSize(), + static_cast(query_read_buffer.GetSize()), wgpu::CallbackMode::WaitAnyOnly, [](wgpu::MapAsyncStatus status, wgpu::StringView message) { ORT_ENFORCE(status == wgpu::MapAsyncStatus::Success, "Failed to download data from buffer: ", std::string_view{message}); @@ -658,8 +679,14 @@ WebGpuContext& WebGpuContextFactory::CreateContext(const WebGpuContextConfig& co ORT_ENFORCE(instance == nullptr && adapter == nullptr && device == nullptr, "WebGPU EP default context (contextId=0) must not have custom WebGPU instance, adapter or device."); - std::call_once(init_default_flag_, [dawn_proc_table = config.dawn_proc_table]() { - // Step.1 - setup dawn proc table + std::call_once(init_default_flag_, [ +#if !defined(__wasm__) + dawn_proc_table = config.dawn_proc_table +#endif + ]() { + // Step.1 - setup dawn proc table (only for non-WASM build) + +#if !defined(__wasm__) const DawnProcTable* dawn_procs = reinterpret_cast(dawn_proc_table); #if defined(BUILD_DAWN_MONOLITHIC_LIBRARY) ORT_ENFORCE(dawn_procs == nullptr, "setting DawnProcTable is not allowed when dynamically linked to webgpu_dawn."); @@ -672,12 +699,17 @@ WebGpuContext& WebGpuContextFactory::CreateContext(const WebGpuContextConfig& co ORT_ENFORCE(dawn_procs != nullptr, "DawnProcTable must be provided."); #endif dawnProcSetProcs(dawn_procs); +#endif #endif // Step.2 - Create wgpu::Instance +#if !defined(__wasm__) wgpu::InstanceDescriptor instance_desc{}; instance_desc.features.timedWaitAnyEnable = true; default_instance_ = wgpu::CreateInstance(&instance_desc); +#else + default_instance_ = wgpu::CreateInstance(nullptr); +#endif ORT_ENFORCE(default_instance_ != nullptr, "Failed to create wgpu::Instance."); }); diff --git a/onnxruntime/core/providers/webgpu/webgpu_context.h b/onnxruntime/core/providers/webgpu/webgpu_context.h index 4724118a29b0a..80c8c64ce72b6 100644 --- a/onnxruntime/core/providers/webgpu/webgpu_context.h +++ b/onnxruntime/core/providers/webgpu/webgpu_context.h @@ -3,10 +3,6 @@ #pragma once -#ifdef __EMSCRIPTEN__ -#include -#endif - #include #include diff --git a/onnxruntime/core/providers/webgpu/webgpu_execution_provider.cc b/onnxruntime/core/providers/webgpu/webgpu_execution_provider.cc index 76a55b7ce4f2e..dec7e48786bf5 100644 --- a/onnxruntime/core/providers/webgpu/webgpu_execution_provider.cc +++ b/onnxruntime/core/providers/webgpu/webgpu_execution_provider.cc @@ -3,9 +3,6 @@ #include "core/providers/webgpu/webgpu_execution_provider.h" -#ifdef __EMSCRIPTEN__ -#include -#endif #include #include #include @@ -637,11 +634,12 @@ std::unique_ptr RegisterKernels() { BuildKernelCreateInfo, BuildKernelCreateInfo, - // BuildKernelCreateInfo, - // BuildKernelCreateInfo, - // BuildKernelCreateInfo, - // BuildKernelCreateInfo, - // BuildKernelCreateInfo, + BuildKernelCreateInfo, + BuildKernelCreateInfo, + BuildKernelCreateInfo, + BuildKernelCreateInfo, + BuildKernelCreateInfo, + BuildKernelCreateInfo, BuildKernelCreateInfo, @@ -663,10 +661,10 @@ std::unique_ptr RegisterKernels() { // BuildKernelCreateInfo, // BuildKernelCreateInfo, - // BuildKernelCreateInfo, - // BuildKernelCreateInfo, - // BuildKernelCreateInfo, - // BuildKernelCreateInfo, + BuildKernelCreateInfo, + BuildKernelCreateInfo, + BuildKernelCreateInfo, + BuildKernelCreateInfo, BuildKernelCreateInfo, BuildKernelCreateInfo, diff --git a/onnxruntime/core/providers/webnn/builders/helper.cc b/onnxruntime/core/providers/webnn/builders/helper.cc index 45a87960126cd..e5124a90df394 100644 --- a/onnxruntime/core/providers/webnn/builders/helper.cc +++ b/onnxruntime/core/providers/webnn/builders/helper.cc @@ -99,44 +99,30 @@ bool IsTensorShapeSupported(const NodeArg& node_arg, const std::string& parent_n return true; } -std::vector> GetSupportedNodes(const GraphViewer& graph_viewer, - const emscripten::val& wnn_builder, - const WebnnDeviceType device_type, - const emscripten::val& wnn_limits, - const logging::Logger& logger) { - std::vector> supported_node_groups; - std::vector supported_node_group; - const auto& node_indices = graph_viewer.GetNodesInTopologicalOrder(); - - for (size_t i = 0; i < node_indices.size(); i++) { - auto node_idx = node_indices[i]; - const auto* node(graph_viewer.GetNode(node_idx)); +std::unordered_set GetSupportedNodes(const GraphViewer& graph_viewer, + const emscripten::val& wnn_builder, + const WebnnDeviceType device_type, + const emscripten::val& wnn_limits, + const logging::Logger& logger) { + std::unordered_set supported_nodes; + + for (const auto& node : graph_viewer.Nodes()) { bool supported = false; // Firstly check if platform supports the WebNN op. - if (CheckSingleOp(node->OpType(), wnn_builder, device_type)) { - supported = IsNodeSupported(*node, graph_viewer, device_type, wnn_limits, logger); + if (CheckSingleOp(node.OpType(), wnn_builder, device_type)) { + supported = IsNodeSupported(node, graph_viewer, device_type, wnn_limits, logger); } - - LOGS(logger, VERBOSE) << "Operator type: [" << node->OpType() - << "] index: [" << node_idx - << "] name: [" << node->Name() + LOGS(logger, VERBOSE) << "Operator type: [" << node.OpType() + << "] index: [" << node.Index() + << "] name: [" << node.Name() << "] supported: [" << supported << "]"; if (supported) { - supported_node_group.push_back(node_idx); - } else { - if (!supported_node_group.empty()) { - supported_node_groups.push_back(supported_node_group); - supported_node_group.clear(); - } + supported_nodes.insert(&node); } } - if (!supported_node_group.empty()) { - supported_node_groups.push_back(supported_node_group); - } - - return supported_node_groups; + return supported_nodes; } bool AreInputDataTypesSame(const std::string& op_type, diff --git a/onnxruntime/core/providers/webnn/builders/helper.h b/onnxruntime/core/providers/webnn/builders/helper.h index cf80eeef3418b..27607ddb4d8a2 100644 --- a/onnxruntime/core/providers/webnn/builders/helper.h +++ b/onnxruntime/core/providers/webnn/builders/helper.h @@ -188,12 +188,14 @@ inline bool TensorExists(const ConstPointerContainer>& def bool IsTensorShapeSupported(const NodeArg& node_arg, const std::string& parent_name, const logging::Logger& logger, bool allow_empty_input = false); -// Get a list of groups of supported nodes, each group represents a subgraph supported by WebNN EP. -std::vector> GetSupportedNodes(const GraphViewer& graph_viewer, - const emscripten::val& wnn_builder, - const WebnnDeviceType device_type, - const emscripten::val& wnn_limits, - const logging::Logger& logger); +// Get a set of nodes supported by WebNN EP. +std::unordered_set GetSupportedNodes(const GraphViewer& graph_viewer, + const emscripten::val& wnn_builder, + const WebnnDeviceType device_type, + const emscripten::val& wnn_limits, + const logging::Logger& logger); +// TODO(@Honry): Some ONNX ops are supported by decomposed WebNN ops, +// we need to check the support of the decomposed ops. static const InlinedHashMap op_map = { {"Abs", "abs"}, {"Add", "add"}, @@ -273,6 +275,7 @@ static const InlinedHashMap op_map = { {"Relu", "relu"}, {"Reshape", "reshape"}, {"Resize", "resample2d"}, + {"RotaryEmbedding", "gather"}, {"ScatterElements", "scatterElements"}, {"ScatterND", "scatterND"}, {"Shape", "slice"}, diff --git a/onnxruntime/core/providers/webnn/builders/impl/activation_op_builder.cc b/onnxruntime/core/providers/webnn/builders/impl/activation_op_builder.cc index 781ddcb896155..585fddfd1ff2c 100644 --- a/onnxruntime/core/providers/webnn/builders/impl/activation_op_builder.cc +++ b/onnxruntime/core/providers/webnn/builders/impl/activation_op_builder.cc @@ -17,10 +17,6 @@ class ActivationOpBuilder : public BaseOpBuilder { private: Status AddToModelBuilderImpl(ModelBuilder& model_builder, const Node& node, const logging::Logger& logger) const override ORT_MUST_USE_RESULT; - - // Operator support related. - bool IsOpSupportedImpl(const InitializedTensorSet& initializers, const Node& node, - WebnnDeviceType device_type, const logging::Logger& logger) const override; }; // Add operator related. @@ -68,30 +64,6 @@ Status ActivationOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_builder, return Status::OK(); } -// Operator support related. -bool ActivationOpBuilder::IsOpSupportedImpl(const InitializedTensorSet& /* initializers */, - const Node& node, - WebnnDeviceType device_type, - const logging::Logger& logger) const { - const auto& input_defs = node.InputDefs(); - const auto& op_type = node.OpType(); - - std::vector input_shape; - if (!GetShape(*input_defs[0], input_shape, logger)) - return false; - - if (op_type == "Elu" && device_type == WebnnDeviceType::CPU) { - NodeAttrHelper helper(node); - float alpha = helper.Get("alpha", 1.0f); - if (alpha != 1.0f) { - LOGS(logger, VERBOSE) << "WebNN CPU backend only supports Elu's alpha == 1.0"; - return false; - } - } - - return true; -} - void CreateActivationOpBuilder(const std::string& op_type, OpBuilderRegistrations& op_registrations) { if (op_registrations.op_builder_map.count(op_type) > 0) return; diff --git a/onnxruntime/core/providers/webnn/builders/impl/binary_op_builder.cc b/onnxruntime/core/providers/webnn/builders/impl/binary_op_builder.cc index e14507e8f5aea..c5493f97fdb21 100644 --- a/onnxruntime/core/providers/webnn/builders/impl/binary_op_builder.cc +++ b/onnxruntime/core/providers/webnn/builders/impl/binary_op_builder.cc @@ -20,8 +20,6 @@ class BinaryOpBuilder : public BaseOpBuilder { const logging::Logger& logger) const override ORT_MUST_USE_RESULT; // Operator support related. - bool IsOpSupportedImpl(const InitializedTensorSet& initializers, const Node& node, - const WebnnDeviceType device_type, const logging::Logger& logger) const override; bool HasSupportedInputsImpl(const InitializedTensorSet& /* initializers */, const Node& node, const emscripten::val& wnn_limits, const logging::Logger& logger) const override; }; @@ -59,33 +57,6 @@ Status BinaryOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_builder, const return Status::OK(); } -bool BinaryOpBuilder::IsOpSupportedImpl(const InitializedTensorSet& initializers, - const Node& node, - const WebnnDeviceType device_type, - const logging::Logger& logger) const { - const auto& input_defs = node.InputDefs(); - const auto& op_type = node.OpType(); - - std::vector input0_shape; - std::vector input1_shape; - if (!GetShape(*input_defs[0], input0_shape, logger) || - !GetShape(*input_defs[1], input1_shape, logger)) { - return false; - } - - // 'prelu' op in WebNN CPU backend restricts the last dimension of input and slope to be same. - // TODO: Remove this workaround once the associated issue is resolved in Chromium: - // https://issues.chromium.org/issues/335517470. - if (op_type == "PRelu" && device_type == WebnnDeviceType::CPU) { - if (input0_shape.back() != input1_shape.back()) { - LOGS(logger, VERBOSE) << "The last dimension of input and slope for PRelu must be same for WebNN CPU backend."; - return false; - } - } - - return true; -} - bool BinaryOpBuilder::HasSupportedInputsImpl(const InitializedTensorSet& /* initializers */, const Node& node, const emscripten::val& wnn_limits, const logging::Logger& logger) const { const auto& input_defs = node.InputDefs(); diff --git a/onnxruntime/core/providers/webnn/builders/impl/clip_op_builder.cc b/onnxruntime/core/providers/webnn/builders/impl/clip_op_builder.cc index 374143c886849..a244efdd9b2eb 100644 --- a/onnxruntime/core/providers/webnn/builders/impl/clip_op_builder.cc +++ b/onnxruntime/core/providers/webnn/builders/impl/clip_op_builder.cc @@ -69,27 +69,7 @@ bool ClipOpBuilder::IsOpSupportedImpl(const InitializedTensorSet& initializers, // can ensure initializers are constant. See #19401 for details of how this update was made to the NNAPI EP. // GetClipMinMax(graph_viewer, node, minValue, maxValue, logger) float min, max; - if (GetClipMinMax(initializers, node, min, max, logger)) { - // WebNN CPU backend only supports 3 specific ranges: [0.0, infinity], [-1.0, 1.0], [0.0, 6.0]. - // TODO: Remove this workaround once the associated issue is resolved in Chromium: - // https://issues.chromium.org/issues/326156496. - if (device_type == WebnnDeviceType::CPU) { - if ((min == 0.0f && max == std::numeric_limits::infinity()) || - (min == -1.0f && max == 1.0f) || - (min == 0.0f && max == 6.0f)) { - return true; - } else { - LOGS(logger, VERBOSE) << "Clip min and max values (" - << min << ", " - << max << ") are not supported for WebNN CPU backend"; - return false; - } - } - - return true; - } else { - return false; - }; + return GetClipMinMax(initializers, node, min, max, logger); } void CreateClipOpBuilder(const std::string& op_type, OpBuilderRegistrations& op_registrations) { diff --git a/onnxruntime/core/providers/webnn/builders/impl/conv_op_builder.cc b/onnxruntime/core/providers/webnn/builders/impl/conv_op_builder.cc index 548e718b8774e..e623590e3bc1a 100644 --- a/onnxruntime/core/providers/webnn/builders/impl/conv_op_builder.cc +++ b/onnxruntime/core/providers/webnn/builders/impl/conv_op_builder.cc @@ -378,22 +378,6 @@ bool ConvOpBuilder::IsOpSupportedImpl(const InitializedTensorSet& initializers, return false; } - // WebNN CPU backend (TFLite) only supports default dilations and group. - // https://source.chromium.org/chromium/chromium/src/+/main:services/webnn/tflite/graph_builder_tflite.cc;l=1040 - if (device_type == WebnnDeviceType::CPU && op_type == "ConvTranspose") { - NodeAttrHelper helper(node); - const auto dilations = helper.Get("dilations", std::vector{1, 1}); - const auto group = helper.Get("group", 1); - if (dilations[0] != 1 || (dilations.size() > 1 && dilations[1] != 1)) { - LOGS(logger, VERBOSE) << op_type << " for WebNN CPU backend only supports default dilation 1."; - return false; - } - if (group != 1) { - LOGS(logger, VERBOSE) << op_type << " for WebNN CPU backend only supports default group 1."; - return false; - } - } - return true; } diff --git a/onnxruntime/core/providers/webnn/builders/impl/rotaryEmbedding_op_builder.cc b/onnxruntime/core/providers/webnn/builders/impl/rotaryEmbedding_op_builder.cc new file mode 100644 index 0000000000000..f6b7a7089fef5 --- /dev/null +++ b/onnxruntime/core/providers/webnn/builders/impl/rotaryEmbedding_op_builder.cc @@ -0,0 +1,314 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Copyright (c) Intel Corporation. All rights reserved. +// Licensed under the MIT License. + +#include "core/providers/common.h" +#include "core/providers/shared/utils/utils.h" +#include "core/providers/webnn/builders/helper.h" +#include "core/providers/webnn/builders/model_builder.h" +#include "core/providers/webnn/builders/op_builder_factory.h" + +#include "base_op_builder.h" + +// WebNN doesn't provide a dedicated op for RotaryEmbedding. Instead, we implement it by using a +// combination of WebNN ops. The decomposed graph is referenced from DML EP at: +// onnxruntime/core/providers/dml/DmlExecutionProvider/src/Operators/DmlOperatorRotaryEmbedding.cpp +/* + Input CosCache PositionIds SinCache + | | | | + | | +--------+-----------+ | + Split | | | | + | | Gather Gather + +-------+ | | | + | | | | + | Identity----------+ | | + | | | | | + | | | | | + | --Split-- | | | + | \ / | +-----------------+ | + | \ / | | | + | \ / Mul | + | \ / | | + | X | | + | / \ | | + | / \ | | + | Join | | + | | | | + | | +---------------------------------------------------------+ + | | | | + | Mul | + | | | + | +-----+ +------+ + | | | + | Add + | | + +-------------+ | + | | + Join +*/ +namespace onnxruntime { +namespace webnn { + +class RotaryEmbeddingOpBuilder : public BaseOpBuilder { + // Add operator related. + private: + Status AddToModelBuilderImpl(ModelBuilder& model_builder, const Node& node, + const logging::Logger& logger) const override ORT_MUST_USE_RESULT; + + // Operator support related. + private: + bool IsOpSupportedImpl(const InitializedTensorSet& initializers, const Node& node, + const WebnnDeviceType /* device_type */, const logging::Logger& logger) const override; +}; + +Status RotaryEmbeddingOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_builder, const Node& node, + const logging::Logger& logger) const { + const auto& input_defs = node.InputDefs(); + int32_t input_data_type; + ORT_RETURN_IF_NOT(GetType(*input_defs[0], input_data_type, logger), "Cannot get input type"); + std::vector input_shape; + std::vector position_ids_shape; + std::vector cos_cache_shape; + ORT_RETURN_IF_NOT(GetShape(*input_defs[0], input_shape, logger), "Cannot get input shape"); + ORT_RETURN_IF_NOT(GetShape(*input_defs[1], position_ids_shape, logger), "Cannot get position_ids shape"); + ORT_RETURN_IF_NOT(GetShape(*input_defs[2], cos_cache_shape, logger), "Cannot get cos_cache shape"); + const bool input_is_4d = input_shape.size() == 4; + // When position_ids is a 1D tensor, it represents the start offset for each sequence. + const bool position_ids_is_offset = position_ids_shape.size() == 1; + + emscripten::val input = model_builder.GetOperand(input_defs[0]->Name()); + emscripten::val position_ids = model_builder.GetOperand(input_defs[1]->Name()); + emscripten::val cos_cache = model_builder.GetOperand(input_defs[2]->Name()); + emscripten::val sin_cache = model_builder.GetOperand(input_defs[3]->Name()); + + const auto node_name = node.Name(); + emscripten::val wnn_builder = model_builder.GetBuilder(); + + NodeAttrHelper helper(node); + const bool interleaved = gsl::narrow_cast(helper.Get("interleaved", 0)); + uint32_t num_heads = helper.Get("num_heads", 0); + uint32_t rotary_embedding_dim = helper.Get("rotary_embedding_dim", 0); + + // The input is either with 3D tensor shape (batch_size, sequence_length, hidden_size) or + // 4D tensor shape (batch_size, num_heads, sequence_length, head_size) + const uint32_t batch_size = static_cast(input_shape[0]); + const uint32_t sequence_length = input_is_4d ? static_cast(input_shape[2]) + : static_cast(input_shape[1]); + const uint32_t hidden_size = input_is_4d ? static_cast(input_shape[1] * input_shape[3]) + : static_cast(input_shape[2]); + const uint32_t head_size = num_heads == 0 ? static_cast(cos_cache_shape[1]) * 2 + : hidden_size / num_heads; + if (num_heads == 0) { + num_heads = hidden_size / head_size; + } + if (rotary_embedding_dim == 0) { + rotary_embedding_dim = head_size; + } + + // First ensure the input has shape (batch_size, num_heads, sequence_length, head_size). + if (!input_is_4d) { + const std::vector new_shape{batch_size, num_heads, sequence_length, head_size}; + emscripten::val reshape_input_options = emscripten::val::object(); + reshape_input_options.set("label", node_name + "_reshape_input"); + input = wnn_builder.call( + "reshape", input, emscripten::val::array(new_shape), reshape_input_options); + } + + // Split the input to perform the rotary embedding only on a subregion of the tensor if needed. + // The split inputs will be joined back together at the end. + emscripten::val partial_input0 = input; + emscripten::val partial_input1 = emscripten::val::undefined(); + if (head_size != rotary_embedding_dim) { + const std::vector splits{rotary_embedding_dim, head_size - rotary_embedding_dim}; + emscripten::val split_input_options = emscripten::val::object(); + split_input_options.set("label", node_name + "_split_input"); + split_input_options.set("axis", 3); + emscripten::val split = wnn_builder.call( + "split", input, emscripten::val::array(splits), split_input_options); + partial_input0 = split[0]; + partial_input1 = split[1]; + } + + // Split the partial input0 data into 2 equal parts. + // Firstly reshape the partial input0. + const std::vector new_partial_input0_shape = + interleaved ? std::vector({batch_size, sequence_length, num_heads, rotary_embedding_dim / 2, 2}) + : std::vector({batch_size, sequence_length, num_heads, 2, rotary_embedding_dim / 2}); + emscripten::val reshape_partial_input0_options = emscripten::val::object(); + reshape_partial_input0_options.set("label", node_name + "_reshape_partial_input0"); + partial_input0 = wnn_builder.call( + "reshape", partial_input0, emscripten::val::array(new_partial_input0_shape), reshape_partial_input0_options); + // Split partial input0. + const int split_axis = interleaved ? 4 : 3; + emscripten::val split_partial_input0_options = emscripten::val::object(); + split_partial_input0_options.set("label", node_name + "_split_partial_input0"); + split_partial_input0_options.set("axis", split_axis); + emscripten::val split_partial_input0 = wnn_builder.call( + "split", partial_input0, 2, split_partial_input0_options); + + // Swap the two halves and join them together. + emscripten::val concat_partial_input0_options = emscripten::val::object(); + concat_partial_input0_options.set("label", node_name + "_concat_partial_input0"); + emscripten::val concated_partial_input0 = wnn_builder.call( + "concat", split_partial_input0.call("reverse"), split_axis, concat_partial_input0_options); + + if (position_ids_is_offset) { + // We generate a sequence from 0 to sequence_length and add the offset to it. + const std::vector position_ids_range_shape = {1, sequence_length}; + emscripten::val position_ids_range_buffer = emscripten::val::global("BigInt64Array").new_(sequence_length); + for (uint32_t i = 0; i < sequence_length; i++) { + position_ids_range_buffer.set(i, emscripten::val::global("BigInt")(i)); + } + emscripten::val position_ids_range_desc = emscripten::val::object(); + position_ids_range_desc.set("shape", emscripten::val::array(position_ids_range_shape)); + position_ids_range_desc.set("dimensions", emscripten::val::array(position_ids_range_shape)); + position_ids_range_desc.set("dataType", emscripten::val("int64")); + emscripten::val position_ids_range = wnn_builder.call( + "constant", position_ids_range_desc, position_ids_range_buffer); + // Add the offset to the sequence. + emscripten::val position_ids_add_range_options = emscripten::val::object(); + position_ids_add_range_options.set("label", node_name + "_position_ids_add_range"); + position_ids = wnn_builder.call( + "add", position_ids, position_ids_range, position_ids_add_range_options); + } + + // Gather the cosine/sine values based on the position_ids. + emscripten::val gather_cos_sin_options = emscripten::val::object(); + gather_cos_sin_options.set("label", node_name + "_gather_cos_sin"); + gather_cos_sin_options.set("axis", 0); + emscripten::val gather_cos = wnn_builder.call( + "gather", cos_cache, position_ids, gather_cos_sin_options); + emscripten::val gather_sin = wnn_builder.call( + "gather", sin_cache, position_ids, gather_cos_sin_options); + + // After gathering cosine/sine, reshape and broadcast them to match the number of heads of the input data. + const std::vector reshaped_cos_sin_shape = + interleaved ? std::vector({batch_size, sequence_length, 1, rotary_embedding_dim / 2, 1}) + : std::vector({batch_size, sequence_length, 1, 1, rotary_embedding_dim / 2}); + emscripten::val reshape_gather_cos_sin_options = emscripten::val::object(); + reshape_gather_cos_sin_options.set("label", node_name + "_reshape_gather_cos_sin"); + gather_cos = wnn_builder.call( + "reshape", gather_cos, emscripten::val::array(reshaped_cos_sin_shape), reshape_gather_cos_sin_options); + gather_sin = wnn_builder.call( + "reshape", gather_sin, emscripten::val::array(reshaped_cos_sin_shape), reshape_gather_cos_sin_options); + + // Multiply the non-rotated data with the cosine and the rotated data with the sine. + emscripten::val mul_cos_options = emscripten::val::object(); + mul_cos_options.set("label", node_name + "_mul_cos"); + emscripten::val mul_cos = wnn_builder.call( + "mul", partial_input0, gather_cos, mul_cos_options); + emscripten::val mul_sin_options = emscripten::val::object(); + mul_sin_options.set("label", node_name + "_mul_sin"); + emscripten::val mul_sin = wnn_builder.call( + "mul", concated_partial_input0, gather_sin, mul_sin_options); + + // Create a vector that contains the sign values {-1, 1}. + emscripten::val sign_buffer = emscripten::val::undefined(); + const std::vector sign_shape = interleaved ? std::vector({1, 1, 1, 2}) + : std::vector({1, 1, 2, 1}); + emscripten::val sign_constant_desc = emscripten::val::object(); + sign_constant_desc.set("shape", emscripten::val::array(sign_shape)); + sign_constant_desc.set("dimensions", emscripten::val::array(sign_shape)); + ORT_RETURN_IF_NOT(SetWebnnDataType(sign_constant_desc, input_data_type), "Unsupported data type"); + if (input_data_type == ONNX_NAMESPACE::TensorProto_DataType_FLOAT) { + sign_buffer = emscripten::val::global("Float32Array").new_(2); + sign_buffer.set(0, -1.0f); + sign_buffer.set(1, 1.0f); + } else if (input_data_type == ONNX_NAMESPACE::TensorProto_DataType_FLOAT16) { + sign_buffer = emscripten::val::global("Uint16Array").new_(2); + sign_buffer.set(0, PackFloat32ToUint16AsFloat16(-1.0f)); + sign_buffer.set(1, PackFloat32ToUint16AsFloat16(1.0f)); + } else { + return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT, "Unsupported input data type: ", input_data_type); + } + emscripten::val sign_constant = wnn_builder.call("constant", sign_constant_desc, sign_buffer); + + // Multiply the broadcasted sign values with the rotated input. + emscripten::val mul_sign_options = emscripten::val::object(); + mul_sign_options.set("label", node_name + "_mul_sign"); + mul_sin = wnn_builder.call("mul", mul_sin, sign_constant, mul_sign_options); + + // Reshape mul_cos and mul_sin to (batch_size, sequence_length, num_heads, rotary_embedding_dim). + const std::vector reshaped_mul_cos_sin_shape = + {batch_size, sequence_length, num_heads, rotary_embedding_dim}; + emscripten::val reshape_mul_cos_sin_options = emscripten::val::object(); + reshape_mul_cos_sin_options.set("label", node_name + "_reshape_mul_cos_sign"); + mul_cos = wnn_builder.call( + "reshape", mul_cos, emscripten::val::array(reshaped_mul_cos_sin_shape), reshape_mul_cos_sin_options); + mul_sin = wnn_builder.call( + "reshape", mul_sin, emscripten::val::array(reshaped_mul_cos_sin_shape), reshape_mul_cos_sin_options); + + // Add the multiplied cos and sin values together. + emscripten::val add_mul_cos_sin_options = emscripten::val::object(); + add_mul_cos_sin_options.set("label", node_name + "_add_mul_cos_sin"); + emscripten::val output = wnn_builder.call( + "add", mul_cos, mul_sin, add_mul_cos_sin_options); + + // Join the added values with the rest of the input. + if (head_size != rotary_embedding_dim) { + emscripten::val concat_back_input_options = emscripten::val::object(); + concat_back_input_options.set("label", node_name + "_concat_back_input"); + emscripten::val concat_inputs = emscripten::val::array(); + concat_inputs.call("push", output); + concat_inputs.call("push", partial_input1); + output = wnn_builder.call("concat", concat_inputs, 3, concat_back_input_options); + } + + // Reshape the output to the original shape. The output shape is the same as the input shape. + const std::vector output_shape = GetVecUint32FromVecInt64(input_shape); + emscripten::val reshape_output_options = emscripten::val::object(); + reshape_output_options.set("label", node_name + "_reshape_output"); + output = wnn_builder.call( + "reshape", output, emscripten::val::array(output_shape), reshape_output_options); + + model_builder.AddOperand(node.OutputDefs()[0]->Name(), std::move(output)); + return Status::OK(); +} + +// Operator support related. +bool RotaryEmbeddingOpBuilder::IsOpSupportedImpl(const InitializedTensorSet& initializers, const Node& node, + const WebnnDeviceType /* device_type */, + const logging::Logger& logger) const { + const auto& input_defs = node.InputDefs(); + std::vector input_shape; + std::vector cos_cache_shape; + if (!GetShape(*input_defs[0], input_shape, logger)) return false; + if (!GetShape(*input_defs[2], cos_cache_shape, logger)) return false; + const auto input_size = input_shape.size(); + if (input_size != 3 && input_size != 4) { + LOGS(logger, VERBOSE) << "RotaryEmbedding only supports 3D or 4D input shape, input is " << input_size << "D shape"; + return false; + } + + NodeAttrHelper helper(node); + const int is_packed_batching = helper.Get("is_packed_batching", 0); + const int num_heads = helper.Get("num_heads", 0); + const int rotary_embedding_dim = helper.Get("rotary_embedding_dim", 0); + + const auto sequence_length = input_size == 4 ? input_shape[2] : input_shape[1]; + if (is_packed_batching == 0 && sequence_length > cos_cache_shape[0]) { + LOGS(logger, VERBOSE) << "RotaryEmbedding: updating cos_cache and sin_cache is not currently supported"; + return false; + } + + if (input_size == 4 && num_heads != 0 && num_heads != input_shape[1]) { + LOGS(logger, VERBOSE) << "RotaryEmbedding: when input has 4 dimensions, num_heads must be 0 or have the same value " + "as the second dimension of the input"; + return false; + } + + if (rotary_embedding_dim > 0 && num_heads == 0) { + LOGS(logger, VERBOSE) << "RotaryEmbedding: num_heads must be provided if rotary_embedding_dim is specified"; + return false; + } + + return true; +} + +void CreateRotaryEmbeddingOpBuilder(const std::string& op_type, OpBuilderRegistrations& op_registrations) { + op_registrations.builders.push_back(std::make_unique()); + op_registrations.op_builder_map.emplace(op_type, op_registrations.builders.back().get()); +} + +} // namespace webnn +} // namespace onnxruntime diff --git a/onnxruntime/core/providers/webnn/builders/model_builder.cc b/onnxruntime/core/providers/webnn/builders/model_builder.cc index e8f116d390199..097dd16307b88 100644 --- a/onnxruntime/core/providers/webnn/builders/model_builder.cc +++ b/onnxruntime/core/providers/webnn/builders/model_builder.cc @@ -75,9 +75,18 @@ InitializedTensorSet ModelBuilder::GetInitializerTensors() { } void ModelBuilder::PreprocessInitializers() { + const auto& initializers = graph_viewer_.GetAllInitializedTensors(); const auto& node_indices = graph_viewer_.GetNodesInTopologicalOrder(); for (size_t i = 0; i < node_indices.size(); i++) { const auto* node(graph_viewer_.GetNode(node_indices[i])); + + // find all initializers consumed. AddInitializersToSkip will potentially decrement the usage count. + for (const auto* input : node->InputDefs()) { + if (input->Exists() && Contains(initializers, input->Name())) { + initializer_usage_[input->Name()]++; + } + } + if (const auto* op_builder = GetOpBuilder(*node)) { op_builder->AddInitializersToSkip(*this, *node); } @@ -90,12 +99,11 @@ Status ModelBuilder::RegisterInitializers() { const auto& name = tensor.name(); const auto& shape = tensor.dims(); - // Ignore the following tensors: - // 1. Empty tensors: optional tensors can be indicated by an empty name. - // 2. Tensors in skipped_initializers_: These are tensors that are not used as WebNN Constants. - // Note: Scalar tensors are excluded because ONNX Runtime will optimize same scalar initializers into one. - if (name.empty() || (Contains(skipped_initializers_, name) && !shape.empty())) + // skip initializer if there is no remaining usage + auto usage_count = initializer_usage_[name]; + if (usage_count == 0) { continue; + } std::vector dims; // When the shape is empty, it is scalar initializer that dims = {}; @@ -385,7 +393,13 @@ void ModelBuilder::AddOperand(const std::string& name, const emscripten::val& op } void ModelBuilder::AddInitializerToSkip(const std::string& tensor_name) { - skipped_initializers_.insert(tensor_name); + // Decrement usage count if this is a known initializer. + // For simplicity the OpBuilder::AddInitializersToSkip implementations may call this for arbitrary input names + // without first checking if the value is an initializer. + auto entry = initializer_usage_.find(tensor_name); + if (entry != initializer_usage_.end()) { + --entry->second; + } } void ModelBuilder::AddInputToSkip(const std::string& input_name) { diff --git a/onnxruntime/core/providers/webnn/builders/model_builder.h b/onnxruntime/core/providers/webnn/builders/model_builder.h index 0fc2fa20670c7..4e2d84f481df0 100644 --- a/onnxruntime/core/providers/webnn/builders/model_builder.h +++ b/onnxruntime/core/providers/webnn/builders/model_builder.h @@ -81,7 +81,7 @@ class ModelBuilder { InlinedHashMap input_output_info_; - InlinedHashSet skipped_initializers_; + std::unordered_map initializer_usage_; InlinedHashSet skipped_inputs_; uint32_t name_token_{0}; diff --git a/onnxruntime/core/providers/webnn/builders/op_builder_factory.cc b/onnxruntime/core/providers/webnn/builders/op_builder_factory.cc index e0ca50a36dbf9..ee21a33091078 100644 --- a/onnxruntime/core/providers/webnn/builders/op_builder_factory.cc +++ b/onnxruntime/core/providers/webnn/builders/op_builder_factory.cc @@ -196,6 +196,10 @@ static OpBuilderRegistrations CreateOpBuilderRegistrations() { CreateResizeOpBuilder("Resize", op_registrations); } + { // RotaryEmbedding + CreateRotaryEmbeddingOpBuilder("RotaryEmbedding", op_registrations); + } + { // ScatterElements CreateScatterElementsOpBuilder("ScatterElements", op_registrations); } diff --git a/onnxruntime/core/providers/webnn/builders/op_builder_factory.h b/onnxruntime/core/providers/webnn/builders/op_builder_factory.h index 22bd6cd0cfa9f..1c4a7b32f842b 100644 --- a/onnxruntime/core/providers/webnn/builders/op_builder_factory.h +++ b/onnxruntime/core/providers/webnn/builders/op_builder_factory.h @@ -48,6 +48,7 @@ void CreateQDQOpBuilder(const std::string& op_type, OpBuilderRegistrations& op_r void CreateReductionOpBuilder(const std::string& op_type, OpBuilderRegistrations& op_registrations); void CreateReshapeOpBuilder(const std::string& op_type, OpBuilderRegistrations& op_registrations); void CreateResizeOpBuilder(const std::string& op_type, OpBuilderRegistrations& op_registrations); +void CreateRotaryEmbeddingOpBuilder(const std::string& op_type, OpBuilderRegistrations& op_registrations); void CreateScatterElementsOpBuilder(const std::string& op_type, OpBuilderRegistrations& op_registrations); void CreateScatterNDOpBuilder(const std::string& op_type, OpBuilderRegistrations& op_registrations); void CreateShapeOpBuilder(const std::string& op_type, OpBuilderRegistrations& op_registrations); diff --git a/onnxruntime/core/providers/webnn/webnn_execution_provider.cc b/onnxruntime/core/providers/webnn/webnn_execution_provider.cc index 1a337e185b497..00fbb26b731f8 100644 --- a/onnxruntime/core/providers/webnn/webnn_execution_provider.cc +++ b/onnxruntime/core/providers/webnn/webnn_execution_provider.cc @@ -13,6 +13,9 @@ #include "core/common/safeint.h" #include "core/providers/webnn/allocator.h" #include "core/providers/webnn/data_transfer.h" +#include "core/providers/partitioning_utils.h" +#include "core/optimizer/qdq_transformer/selectors_actions/qdq_selectors.h" +#include "core/optimizer/qdq_transformer/selectors_actions/shared/utils.h" #include "builders/model.h" #include "builders/helper.h" @@ -20,6 +23,8 @@ namespace onnxruntime { +constexpr const char* WEBNN = "WEBNN"; + WebNNExecutionProvider::WebNNExecutionProvider(const std::string& webnn_device_flags) : IExecutionProvider{ onnxruntime::kWebNNExecutionProvider, @@ -51,8 +56,6 @@ WebNNExecutionProvider::~WebNNExecutionProvider() {} std::vector> WebNNExecutionProvider::GetCapability(const onnxruntime::GraphViewer& graph_viewer, const IKernelLookup& /*kernel_registries*/) const { - std::vector> result; - // For subgraph which is the attribute of the control flow nodes, part of its initializers are stored in its // ancestor graphs as common initializers shared for other subgraphs. We need to collect all of them used for // identifying the required initializer names and storing into 'meta_def->constant_initializers'. @@ -64,23 +67,6 @@ WebNNExecutionProvider::GetCapability(const onnxruntime::GraphViewer& graph_view all_initializers = webnn::CollectAllInitializedTensors(graph_viewer); } - /* - Very basic search for groups of nodes that can be handled by the EP. - This doesn't work perfectly if you have a scenario like the following where A and D could be handled by the EP - but B is between them in the topological sort as you'll get two single node capabilities. However if can also - be advantageous if C and E could be handled by the EP as they would be combined with D even though not connected. - Not sure how often each of these scenarios happens. - - A B C - | / | - D E - | | - - Would probably be better to walk the edges for each node the EP can handle as they are iterated in topological order, - accumulating nodes (and saving which ones have been taken) until you run out. This would guarantee all - connected nodes that can be handled are grouped together. - */ - const auto& logger = *GetLogger(); emscripten::val wnn_builder = emscripten::val::global("MLGraphBuilder").new_(wnn_context_); @@ -88,43 +74,37 @@ WebNNExecutionProvider::GetCapability(const onnxruntime::GraphViewer& graph_view ORT_THROW("Failed to create WebNN builder."); } - const auto node_groups = webnn::GetSupportedNodes(graph_viewer, wnn_builder, wnn_device_type_, wnn_limits_, logger); - wnn_builder = emscripten::val::undefined(); + // Get all the NodeUnits in the graph_viewer + std::vector> node_unit_holder; + std::unordered_map node_unit_map; - if (node_groups.empty()) { - return result; - } + std::tie(node_unit_holder, node_unit_map) = QDQ::GetAllNodeUnits(graph_viewer, logger); - const auto& graph_output_list = graph_viewer.GetOutputs(); - InlinedHashSet graph_outputs(graph_output_list.cbegin(), graph_output_list.cend()); + const auto supported_nodes = webnn::GetSupportedNodes(graph_viewer, wnn_builder, wnn_device_type_, wnn_limits_, logger); - size_t num_of_supported_nodes = 0; - for (const auto& group : node_groups) { - if (group.empty()) - continue; + const auto gen_metadef_name = [&]() { + HashValue model_hash; + int metadef_id = metadef_id_generator_.GenerateId(graph_viewer, model_hash); + return MakeString(WEBNN, "_", model_hash, "_", metadef_id); + }; - num_of_supported_nodes += group.size(); - LOGS(logger, VERBOSE) << "WebNNExecutionProvider::GetCapability, current supported node group size: " - << group.size(); + auto result = utils::CreateSupportedPartitions(graph_viewer, supported_nodes, {}, + gen_metadef_name, WEBNN, kWebNNExecutionProvider, + &node_unit_map, /*drop_constant_initializers*/ true); - InlinedHashSet node_set; - node_set.reserve(group.size()); - for (const auto& index : group) { - node_set.insert(index); - } + // Release wnn_builder + wnn_builder = emscripten::val::undefined(); - std::unique_ptr sub_graph = std::make_unique(); + const auto& graph_output_list = graph_viewer.GetOutputs(); + InlinedHashSet graph_outputs(graph_output_list.cbegin(), graph_output_list.cend()); + + for (auto& capability : result) { + auto& sub_graph = capability->sub_graph; + if (sub_graph->nodes.empty()) + continue; std::vector subgraph_initializers; - InlinedHashSet node_outputs; - InlinedHashSet subgraph_inputs; - InlinedHashSet subgraph_outputs; - std::vector ordered_subgraph_inputs; - // Output should be unique. It may be produced as graph output and subgraph output. - InlinedHashSet ordered_subgraph_outputs; - - for (const auto& index : group) { - sub_graph->nodes.push_back(index); + for (const auto& index : sub_graph->nodes) { const auto* node = graph_viewer.GetNode(index); for (const auto* input : node->InputDefs()) { @@ -136,39 +116,13 @@ WebNNExecutionProvider::GetCapability(const onnxruntime::GraphViewer& graph_view if (is_subgraph && Contains(all_initializers, input->Name())) { subgraph_initializers.push_back(input->Name()); } - // If the node input was not produced by this subgraph, add it to the subgraph inputs. - if (node_outputs.count(input) == 0) { - if (subgraph_inputs.count(input) == 0) { - subgraph_inputs.insert(input); - ordered_subgraph_inputs.push_back(input); - } - } - } - - const auto& output_defs = node->OutputDefs(); - for (const auto* output_def : output_defs) { - node_outputs.insert(output_def); - // if output is overall graph output we need to produce it. - if (graph_outputs.count(output_def) != 0) { - ordered_subgraph_outputs.insert(output_def); - } - } - - // if output connects to a node not in this subgraph we need to produce it. - for (auto it = node->OutputEdgesBegin(), end = node->OutputEdgesEnd(); it != end; ++it) { - if (node_set.count(it->GetNode().Index()) == 0) { - const auto* output_def = output_defs[it->GetSrcArgIndex()]; - if (subgraph_outputs.count(output_def) == 0) { - subgraph_outputs.insert(output_def); - ordered_subgraph_outputs.insert(output_def); - } - } } } // Assign inputs and outputs to subgraph's meta_def. uint64_t model_hash; int metadef_id = metadef_id_generator_.GenerateId(graph_viewer, model_hash); + const auto meta_def_old = sub_graph->GetMetaDef(); auto meta_def = std::make_unique<::onnxruntime::IndexedSubGraph::MetaDef>(); meta_def->name = "WEBNN_" + std::to_string(model_hash) + "_" + std::to_string(metadef_id); meta_def->domain = kMSDomain; @@ -181,20 +135,24 @@ WebNNExecutionProvider::GetCapability(const onnxruntime::GraphViewer& graph_view } } - for (const auto& input : ordered_subgraph_inputs) { - meta_def->inputs.push_back(input->Name()); + for (const auto& input : meta_def_old->inputs) { + meta_def->inputs.push_back(input); } - for (const auto& output : ordered_subgraph_outputs) { - meta_def->outputs.push_back(output->Name()); + for (const auto& output : meta_def_old->outputs) { + meta_def->outputs.push_back(output); } sub_graph->SetMetaDef(std::move(meta_def)); - - result.push_back(std::make_unique(std::move(sub_graph))); } - auto num_of_partitions = result.size(); + const auto num_of_partitions = result.size(); + const auto num_of_supported_nodes = std::accumulate( + result.begin(), result.end(), size_t{0}, + [](const auto& acc, const auto& partition) -> size_t { + return acc + (partition && partition->sub_graph ? partition->sub_graph->nodes.size() : 0); + }); + const auto summary_msg = MakeString( "WebNNExecutionProvider::GetCapability,", " number of partitions supported by WebNN: ", num_of_partitions, diff --git a/onnxruntime/core/session/IOBinding.h b/onnxruntime/core/session/IOBinding.h index 1f1b3b8073f96..d5a1e273369a1 100644 --- a/onnxruntime/core/session/IOBinding.h +++ b/onnxruntime/core/session/IOBinding.h @@ -51,7 +51,7 @@ class IOBinding { /** * If the BindInput calls are async this function acts as a barrier to ensure all inputs are fully copied - * before you call the Run() method. There is no point calling Run() if you're inputs are not ready at the + * before you call the Run() method. There is no point calling Run() if your inputs are not ready at the * desired location. * This is a blocking call and is a wrapper over IExecutionProvider::Sync(). * Call InferenceSession::Run() only after calling this method or else you'll end up wasting cycles inside Run(). diff --git a/onnxruntime/core/session/allocator_adapters.cc b/onnxruntime/core/session/allocator_adapters.cc index ac5ea75453558..bebf6e98ff3fa 100644 --- a/onnxruntime/core/session/allocator_adapters.cc +++ b/onnxruntime/core/session/allocator_adapters.cc @@ -2,12 +2,17 @@ // Licensed under the MIT License. #include "allocator_adapters.h" +#include "core/framework/error_code_helper.h" #include "core/session/inference_session.h" #include "core/session/ort_env.h" #include "core/session/ort_apis.h" -#include "core/framework/error_code_helper.h" namespace onnxruntime { + +namespace { +constexpr uint32_t kOrtAllocatorReserveMinVersion = 18; +} // namespace + OrtAllocatorImplWrappingIAllocator::OrtAllocatorImplWrappingIAllocator(onnxruntime::AllocatorPtr&& i_allocator) : i_allocator_(std::move(i_allocator)) { OrtAllocator::version = ORT_API_VERSION; @@ -17,7 +22,7 @@ OrtAllocatorImplWrappingIAllocator::OrtAllocatorImplWrappingIAllocator(onnxrunti [](OrtAllocator* this_, void* p) { static_cast(this_)->Free(p); }; OrtAllocator::Info = [](const OrtAllocator* this_) { return static_cast(this_)->Info(); }; - if (OrtAllocator::version >= 18) { + if (OrtAllocator::version >= kOrtAllocatorReserveMinVersion) { OrtAllocator::Reserve = [](OrtAllocator* this_, size_t size) { return static_cast(this_)->Reserve(size); }; } @@ -51,7 +56,7 @@ void* IAllocatorImplWrappingOrtAllocator::Alloc(size_t size) { } void* IAllocatorImplWrappingOrtAllocator::Reserve(size_t size) { - if (ort_allocator_->version >= 18 && ort_allocator_->Reserve) { + if (ort_allocator_->version >= kOrtAllocatorReserveMinVersion && ort_allocator_->Reserve) { return ort_allocator_->Reserve(ort_allocator_, size); } diff --git a/onnxruntime/core/session/inference_session.cc b/onnxruntime/core/session/inference_session.cc index 223eed248800e..26ffeb93ab3b6 100644 --- a/onnxruntime/core/session/inference_session.cc +++ b/onnxruntime/core/session/inference_session.cc @@ -921,7 +921,7 @@ common::Status InferenceSession::SaveToOrtFormat(const std::filesystem::path& fi ORT_RETURN_IF_ERROR(kernel_type_str_resolver.RegisterGraphNodeOpSchemas(model_->MainGraph())); ORT_RETURN_IF_ERROR(standalone::RegisterCustomOpNodeSchemas(kernel_type_str_resolver, model_->MainGraph())); - for (const auto op_schema : saved_runtime_optimization_produced_node_op_schemas_) { + for (const auto& op_schema : saved_runtime_optimization_produced_node_op_schemas_) { ORT_RETURN_IF_ERROR(kernel_type_str_resolver.RegisterOpSchema(*op_schema)); } diff --git a/onnxruntime/core/session/provider_bridge_ort.cc b/onnxruntime/core/session/provider_bridge_ort.cc index af39edae2074d..3a694ac6f8e5e 100644 --- a/onnxruntime/core/session/provider_bridge_ort.cc +++ b/onnxruntime/core/session/provider_bridge_ort.cc @@ -37,7 +37,6 @@ #include "core/framework/model_metadef_id_generator.h" #include "core/optimizer/qdq_transformer/selectors_actions/qdq_selectors.h" #include "core/optimizer/qdq_transformer/selectors_actions/shared/utils.h" -#include "core/session/onnxruntime_session_options_config_keys.h" #include "core/session/onnxruntime_c_api.h" #include "core/common/string_helper.h" @@ -62,6 +61,10 @@ #include "orttraining/core/framework/distributed_run_context.h" #endif +#ifdef _WIN32 +#include "core/platform/windows/logging/etw_sink.h" +#endif + namespace ONNX_NAMESPACE { // We use these names in the provider API because we don't have the protobuf definitions of the RepeatedField* types using int64s = google::protobuf::RepeatedField; @@ -76,11 +79,18 @@ using FunctionProtos = google::protobuf::RepeatedPtrField; namespace onnxruntime { using IndexedSubGraph_MetaDef = IndexedSubGraph::MetaDef; using IndexedSubGraph_SourceOfSchema = IndexedSubGraph::SourceOfSchema; +using Node_EdgeEnd = Node::EdgeEnd; +#ifdef _WIN32 +namespace logging { +using EtwRegistrationManager_EtwInternalCallback = EtwRegistrationManager::EtwInternalCallback; +} +#endif } // namespace onnxruntime #include "core/common/cpuid_info.h" #include "core/common/logging/logging.h" #include "core/providers/shared_library/provider_interfaces.h" +#include "core/providers/partitioning_utils.h" #include "core/providers/cuda/cuda_provider_factory_creator.h" #include "core/providers/cann/cann_provider_factory_creator.h" @@ -90,6 +100,7 @@ using IndexedSubGraph_SourceOfSchema = IndexedSubGraph::SourceOfSchema; #include "core/providers/openvino/openvino_provider_factory_creator.h" #include "core/providers/tensorrt/tensorrt_provider_factory_creator.h" #include "core/providers/vitisai/vitisai_provider_factory_creator.h" +#include "core/providers/qnn/qnn_provider_factory_creator.h" #include "core/providers/cuda/cuda_provider_factory.h" #include "core/providers/cann/cann_provider_factory.h" @@ -181,6 +192,7 @@ struct Node__EdgeIterator_Impl : Node__EdgeIterator { bool operator!=(const Node__EdgeIterator& p) const override { return v_ != static_cast(&p)->v_; } void operator++() override { v_.operator++(); } + const Node_EdgeEnd& operator*() const override { return v_.operator*(); } const Node& GetNode() const override { return v_->GetNode(); } int GetSrcArgIndex() const override { return v_->GetSrcArgIndex(); } int GetDstArgIndex() const override { return v_->GetDstArgIndex(); } @@ -188,6 +200,18 @@ struct Node__EdgeIterator_Impl : Node__EdgeIterator { Node::EdgeConstIterator v_; }; +struct ConstGraphNodes_Iterator_Impl : ConstGraphNodes_Iterator { + ConstGraphNodes_Iterator_Impl(ConstGraphNodes::ConstNodeIterator&& v) : v_{std::move(v)} {} + + bool operator!=(const ConstGraphNodes_Iterator& other) const override { + return v_ != static_cast(&other)->v_; + } + void operator++() override { v_.operator++(); } + const Node& operator*() override { return *v_; } + + ConstGraphNodes::ConstNodeIterator v_; +}; + #if !defined(ORT_MINIMAL_BUILD) || defined(ORT_MINIMAL_BUILD_CUSTOM_OPS) common::Status LoadDynamicLibraryFromProvider(onnxruntime::PathString library_name) { const auto& platform_env = onnxruntime::Env::Default(); @@ -234,10 +258,8 @@ struct ProviderHostImpl : ProviderHost { void* CPUAllocator__Alloc(CPUAllocator* p, size_t size) override { return p->CPUAllocator::Alloc(size); } void CPUAllocator__Free(CPUAllocator* p, void* allocation) override { return p->CPUAllocator::Free(allocation); } -#ifdef USE_CUDA std::unique_ptr CreateCUDAAllocator(int16_t device_id, const char* name) override { return GetProviderInfo_CUDA().CreateCUDAAllocator(device_id, name); } std::unique_ptr CreateCUDAPinnedAllocator(const char* name) override { return GetProviderInfo_CUDA().CreateCUDAPinnedAllocator(name); } - std::unique_ptr CreateGPUDataTransfer() override { return GetProviderInfo_CUDA().CreateGPUDataTransfer(); } void cuda__Impl_Cast(void* stream, const int64_t* input_data, int32_t* output_data, size_t count) override { return GetProviderInfo_CUDA().cuda__Impl_Cast(stream, input_data, output_data, count); } void cuda__Impl_Cast(void* stream, const int32_t* input_data, int64_t* output_data, size_t count) override { return GetProviderInfo_CUDA().cuda__Impl_Cast(stream, input_data, output_data, count); } @@ -247,7 +269,6 @@ struct ProviderHostImpl : ProviderHost { Status CudaCall_false(int retCode, const char* exprString, const char* libName, int successCode, const char* msg, const char* file, const int line) override { return GetProviderInfo_CUDA().CudaCall_false(retCode, exprString, libName, successCode, msg, file, line); } void CudaCall_true(int retCode, const char* exprString, const char* libName, int successCode, const char* msg, const char* file, const int line) override { GetProviderInfo_CUDA().CudaCall_true(retCode, exprString, libName, successCode, msg, file, line); } -#endif #ifdef USE_MIGRAPHX std::unique_ptr CreateMIGraphXAllocator(int16_t device_id, const char* name) override { return GetProviderInfo_MIGraphX().CreateMIGraphXAllocator(device_id, name); } @@ -267,6 +288,8 @@ struct ProviderHostImpl : ProviderHost { Status RocmCall_false(int retCode, const char* exprString, const char* libName, int successCode, const char* msg, const char* file, const int line) override { return GetProviderInfo_ROCM().RocmCall_false(retCode, exprString, libName, successCode, msg, file, line); } void RocmCall_true(int retCode, const char* exprString, const char* libName, int successCode, const char* msg, const char* file, const int line) override { GetProviderInfo_ROCM().RocmCall_true(retCode, exprString, libName, successCode, msg, file, line); } +#else + std::unique_ptr CreateGPUDataTransfer() override { return GetProviderInfo_CUDA().CreateGPUDataTransfer(); } #endif std::string GetEnvironmentVar(const std::string& var_name) override { return Env::Default().GetEnvironmentVar(var_name); } @@ -367,22 +390,58 @@ struct ProviderHostImpl : ProviderHost { // logging::Logger (wrapped) bool logging__Logger__OutputIsEnabled(const logging::Logger* p, logging::Severity severity, logging::DataType data_type) override { return p->OutputIsEnabled(severity, data_type); } + logging::Severity logging__Logger__GetSeverity(const logging::Logger* p) override { + return p->GetSeverity(); + } // logging::LoggingManager (wrapped) const logging::Logger& logging__LoggingManager__DefaultLogger() override { return logging::LoggingManager::DefaultLogger(); } + bool logging__LoggingManager__HasDefaultLogger() override { return logging::LoggingManager::HasDefaultLogger(); } // logging::Capture (wrapped) - std::unique_ptr logging__Capture__construct(const logging::Logger& logger, logging::Severity severity, const char* category, logging::DataType dataType, const CodeLocation& location) override { - return std::make_unique(logger, severity, category, dataType, location); + std::unique_ptr logging__Capture__construct(const logging::Logger& logger, + logging::Severity severity, const char* category, + logging::DataType data_type, + const CodeLocation& location) override { + return std::make_unique(logger, severity, category, data_type, location); } void logging__Capture__operator_delete(logging::Capture* p) noexcept override { delete p; } std::ostream& logging__Capture__Stream(logging::Capture* p) noexcept override { return p->Stream(); } + void logging__Capture__ProcessPrintf(logging::Capture* p, const char* format, va_list args) override { + p->ProcessPrintf(format, args); + } + +#if defined(_WIN32) + // logging::EtwRegistrationManager + logging::EtwRegistrationManager& logging__EtwRegistrationManager__Instance() override { + return logging::EtwRegistrationManager::Instance(); + } + bool logging__EtwRegistrationManager__SupportsETW() override { + return logging::EtwRegistrationManager::SupportsETW(); + } + logging::Severity logging__EtwRegistrationManager__MapLevelToSeverity(logging::EtwRegistrationManager* p) override { + return p->MapLevelToSeverity(); + } + void logging__EtwRegistrationManager__RegisterInternalCallback( + logging::EtwRegistrationManager* p, + const logging::EtwRegistrationManager_EtwInternalCallback& callback) override { + p->RegisterInternalCallback(callback); + } + void logging__EtwRegistrationManager__UnregisterInternalCallback( + logging::EtwRegistrationManager* p, + const logging::EtwRegistrationManager_EtwInternalCallback& callback) override { + p->UnregisterInternalCallback(callback); + } +#endif // defined(_WIN32) // Env Env& Env__Default() override { return Env::Default(); } // Utils::DataTypeUtils (wrapped) const std::string* Utils__DataTypeUtils__ToType(const ONNX_NAMESPACE::TypeProto& type_proto) override { return ONNX_NAMESPACE::Utils::DataTypeUtils::ToType(type_proto); } + const std::string* Utils__DataTypeUtils__ToType(const std::string& type_str) override { + return ONNX_NAMESPACE::Utils::DataTypeUtils::ToType(type_str); + } // int64s (wrapped) int int64s__size(const ONNX_NAMESPACE::int64s* p) override { return p->size(); } @@ -424,6 +483,7 @@ struct ProviderHostImpl : ProviderHost { bool TypeProto_Tensor__has_shape(const ONNX_NAMESPACE::TypeProto_Tensor* p) override { return p->has_shape(); } const ONNX_NAMESPACE::TensorShapeProto& TypeProto_Tensor__shape(const ONNX_NAMESPACE::TypeProto_Tensor* p) override { return p->shape(); } ONNX_NAMESPACE::TensorShapeProto* TypeProto_Tensor__mutable_shape(ONNX_NAMESPACE::TypeProto_Tensor* p) override { return p->mutable_shape(); } + bool TypeProto_Tensor__has_elem_type(const ONNX_NAMESPACE::TypeProto_Tensor* p) override { return p->has_elem_type(); } int32_t TypeProto_Tensor__elem_type(const ONNX_NAMESPACE::TypeProto_Tensor* p) override { return p->elem_type(); } void TypeProto_Tensor__set_elem_type(ONNX_NAMESPACE::TypeProto_Tensor* p, int32_t value) override { p->set_elem_type(value); }; @@ -444,6 +504,7 @@ struct ProviderHostImpl : ProviderHost { // TypeProto (wrapped) std::unique_ptr TypeProto__construct() override { return std::make_unique(); } void TypeProto__CopyFrom(ONNX_NAMESPACE::TypeProto* p, const ONNX_NAMESPACE::TypeProto* other) override { p->CopyFrom(*other); } + bool TypeProto__has_tensor_type(const ONNX_NAMESPACE::TypeProto* p) override { return p->has_tensor_type(); } const ONNX_NAMESPACE::TypeProto_Tensor& TypeProto__tensor_type(const ONNX_NAMESPACE::TypeProto* p) override { return p->tensor_type(); } ONNX_NAMESPACE::TypeProto_Tensor* TypeProto__mutable_tensor_type(ONNX_NAMESPACE::TypeProto* p) override { return p->mutable_tensor_type(); } int TypeProto__value_case(const ONNX_NAMESPACE::TypeProto* p) override { return p->value_case(); } @@ -572,6 +633,7 @@ struct ProviderHostImpl : ProviderHost { const std::string& TensorProto__raw_data(const ONNX_NAMESPACE::TensorProto* p) override { return p->raw_data(); } std::string* TensorProto__mutable_raw_data(ONNX_NAMESPACE::TensorProto* p) override { return p->mutable_raw_data(); } + bool TensorProto__has_data_type(const ONNX_NAMESPACE::TensorProto* p) override { return p->has_data_type(); } int32_t TensorProto__data_type(const ONNX_NAMESPACE::TensorProto* p) override { return p->data_type(); } void TensorProto__set_data_type(ONNX_NAMESPACE::TensorProto* p, int32_t type) override { p->set_data_type(type); } @@ -610,6 +672,10 @@ struct ProviderHostImpl : ProviderHost { return std::make_unique(p->end()); } + size_t TensorShapeProto_Dimensions__size(const ONNX_NAMESPACE::TensorShapeProto_Dimensions* p) override { + return p->size(); + } + // TensorShapeProto (wrapped) int TensorShapeProto__dim_size(const ONNX_NAMESPACE::TensorShapeProto* p) override { return p->dim_size(); } const ONNX_NAMESPACE::TensorShapeProto_Dimensions& TensorShapeProto__dim(const ONNX_NAMESPACE::TensorShapeProto* p) override { return p->dim(); } @@ -960,6 +1026,12 @@ struct ProviderHostImpl : ProviderHost { void Node__AddAttribute(Node* p, const ::std::string& attr_name, const ONNX_NAMESPACE::GraphProto& value) override { p->AddAttribute(attr_name, value); } + void Node__AddAttribute(Node* p, const ::std::string& attr_name, const std::string& value) override { + p->AddAttribute(attr_name, value); + } + void Node__AddAttribute(Node* p, const ::std::string& attr_name, int64_t value) override { + p->AddAttribute(attr_name, value); + } size_t Node__GetInputEdgesCount(const Node* p) noexcept override { return p->GetInputEdgesCount(); } size_t Node__GetOutputEdgesCount(const Node* p) noexcept override { return p->GetOutputEdgesCount(); } @@ -982,6 +1054,11 @@ struct ProviderHostImpl : ProviderHost { std::unordered_map> Node__GetAttributeNameToSubgraphMap(const Node* p) const override { return p->GetAttributeNameToSubgraphMap(); } int Node__NodeType(const Node* p) const noexcept override { return int(p->NodeType()); } + // Node_EdgeEnd (wrapped). Maps to Node::EdgeEnd struct. + const Node& Node_EdgeEnd__GetNode(const Node_EdgeEnd* p) override { return p->GetNode(); } + int Node_EdgeEnd__GetSrcArgIndex(const Node_EdgeEnd* p) override { return p->GetSrcArgIndex(); } + int Node_EdgeEnd__GetDstArgIndex(const Node_EdgeEnd* p) override { return p->GetDstArgIndex(); } + // NodeArg (wrapped) const std::string& NodeArg__Name(const NodeArg* p) noexcept override { return p->Name(); } const ONNX_NAMESPACE::TensorShapeProto* NodeArg__Shape(const NodeArg* p) override { return p->Shape(); } @@ -1016,7 +1093,8 @@ struct ProviderHostImpl : ProviderHost { void NodeAttributes__insert_or_assign(NodeAttributes* p, const std::string& k, const ONNX_NAMESPACE::AttributeProto& v) override { p->insert_or_assign(k, v); } void NodeAttributes__reserve(NodeAttributes* p, size_t size) override { p->reserve(size); } - // NodeUnit (wrapped) + void NodeUnit__operator_delete(NodeUnit* p) noexcept override { delete p; } + int NodeUnit__UnitType(const NodeUnit* p) noexcept override { return static_cast(p->UnitType()); } const std::vector& NodeUnit__Inputs(const NodeUnit* p) noexcept override { @@ -1064,12 +1142,46 @@ struct ProviderHostImpl : ProviderHost { return QDQ::GetAllNodeUnits(*graph_viewer, logger); } + // Partitioning utils + std::vector> + Utils__CreateSupportedPartitions(const GraphViewer& graph_viewer, + const std::unordered_set& supported_nodes, + const std::unordered_set& stop_ops, + const utils::GenerateMetadefNameFn& generate_metadef_name, + const std::string& execution_provider_name, + const std::string& execution_provider_type, + const std::unordered_map* node_unit_map, + bool drop_constant_initializers) override { + return onnxruntime::utils::CreateSupportedPartitions(graph_viewer, + supported_nodes, + stop_ops, + generate_metadef_name, + execution_provider_name, + execution_provider_type, + node_unit_map, + drop_constant_initializers); + } + + std::unique_ptr + Utils__MakeComputeCapability(const GraphViewer& graph_viewer, + const std::vector& group, + const std::function& generate_metadef_name, + const std::string& execution_provider_name, + bool drop_constant_initializers) override { + return onnxruntime::utils::MakeComputeCapability(graph_viewer, group, generate_metadef_name, + execution_provider_name, drop_constant_initializers); + } + // Model (wrapped) std::unique_ptr Model__construct(ONNX_NAMESPACE::ModelProto&& model_proto, const PathString& model_path, const IOnnxRuntimeOpSchemaRegistryList* local_registries, const logging::Logger& logger) override { return std::make_unique(model_proto, model_path, local_registries, logger); } + std::unique_ptr Model__construct(const std::string& graph_name, bool is_onnx_domain_only, + const logging::Logger& logger) override { + return std::make_unique(graph_name, is_onnx_domain_only, logger); + } void Model__operator_delete(Model* p) override { delete p; } Graph& Model__MainGraph(Model* p) override { return p->MainGraph(); } std::unique_ptr Model__ToProto(Model* p) override { return std::make_unique(p->ToProto()); } @@ -1179,6 +1291,7 @@ struct ProviderHostImpl : ProviderHost { const std::string& GraphViewer__Name(const GraphViewer* p) noexcept override { return p->Name(); } const std::filesystem::path& GraphViewer__ModelPath(const GraphViewer* p) noexcept override { return p->ModelPath(); } + const ConstGraphNodes& GraphViewer__Nodes(const GraphViewer* p) noexcept override { return p->Nodes(); } const Node* GraphViewer__GetNode(const GraphViewer* p, NodeIndex node_index) override { return p->GetNode(node_index); } const NodeArg* GraphViewer__GetNodeArg(const GraphViewer* p, const std::string& name) override { return p->GetNodeArg(name); } @@ -1196,6 +1309,9 @@ struct ProviderHostImpl : ProviderHost { const std::vector& GraphViewer__GetInputs(const GraphViewer* p) noexcept override { return p->GetInputs(); } const std::vector& GraphViewer__GetOutputs(const GraphViewer* p) noexcept override { return p->GetOutputs(); } + bool GraphViewer__NodeProducesGraphOutput(const GraphViewer* p, const Node& node) override { + return p->NodeProducesGraphOutput(node); + } const std::unordered_set& GraphViewer__GetValueInfo(const GraphViewer* p) noexcept override { return p->GetValueInfo(); } const InitializedTensorSet& GraphViewer__GetAllInitializedTensors(const GraphViewer* p) override { return p->GetAllInitializedTensors(); } @@ -1224,6 +1340,21 @@ struct ProviderHostImpl : ProviderHost { const Node* GraphViewer__GetProducerNode(const GraphViewer* p, const std::string& node_arg_name) const override { return p->GetProducerNode(node_arg_name); } IOnnxRuntimeOpSchemaCollectionPtr GraphViewer__GetSchemaRegistry(const GraphViewer* p) const override { return p->GetSchemaRegistry(); } + // ConstGraphNodes + std::unique_ptr ConstGraphNodes__begin(const ConstGraphNodes* p) override { + return std::make_unique(p->begin()); + } + std::unique_ptr ConstGraphNodes__end(const ConstGraphNodes* p) override { + return std::make_unique(p->end()); + } + std::unique_ptr ConstGraphNodes__cbegin(const ConstGraphNodes* p) override { + return std::make_unique(p->cbegin()); + } + std::unique_ptr ConstGraphNodes__cend(const ConstGraphNodes* p) override { + return std::make_unique(p->cend()); + } + bool ConstGraphNodes__empty(const ConstGraphNodes* p) noexcept override { return p->empty(); } + // OpKernel (direct) const Node& OpKernel__Node(const OpKernel* p) override { return p->OpKernel::Node(); } @@ -1428,9 +1559,7 @@ struct ProviderHostImpl : ProviderHost { training::DistributedRunContext& GetDistributedRunContextInstance() override { return training::DistributedRunContext::GetInstance(); } #endif -#if defined(USE_CUDA) || defined(USE_ROCM) PhiloxGenerator& PhiloxGenerator__Default() override { return PhiloxGenerator::Default(); } -#endif #ifdef ENABLE_TRAINING_TORCH_INTEROP void contrib__PythonOpBase__Init(contrib::PythonOpBase* p, const OpKernelInfo& info) override { p->PythonOpBase::Init(info); } @@ -1651,6 +1780,9 @@ static ProviderLibrary s_library_tensorrt(LIBRARY_PREFIX ORT_TSTR("onnxruntime_p ); static ProviderLibrary s_library_migraphx(LIBRARY_PREFIX ORT_TSTR("onnxruntime_providers_migraphx") LIBRARY_EXTENSION); +// QNN EP can be built either as a static library or a shared library. Can safely define s_library_qnn even if static. +static ProviderLibrary s_library_qnn(LIBRARY_PREFIX ORT_TSTR("onnxruntime_providers_qnn") LIBRARY_EXTENSION); + void UnloadSharedProviders() { s_library_dnnl.Unload(); s_library_vitisai.Unload(); @@ -1662,6 +1794,7 @@ void UnloadSharedProviders() { s_library_rocm.Unload(); s_library_shared.Unload(); s_library_migraphx.Unload(); + s_library_qnn.Unload(); } // Used by test code @@ -1832,6 +1965,20 @@ ProviderOptions OrtOpenVINOProviderOptionsToOrtOpenVINOProviderOptionsV2(const O return ov_options_converted_map; } +#if !BUILD_QNN_EP_STATIC_LIB +std::shared_ptr QNNProviderFactoryCreator::Create(const ProviderOptions& provider_options_map, + const SessionOptions* session_options) { + const ConfigOptions* config_options = nullptr; + if (session_options != nullptr) { + config_options = &session_options->config_options; + } + + std::array configs_array = {&provider_options_map, config_options}; + const void* arg = reinterpret_cast(&configs_array); + return s_library_qnn.Get().CreateExecutionProviderFactory(arg); +} +#endif // !BUILD_QNN_EP_STATIC_LIB + std::shared_ptr OpenVINOProviderFactoryCreator::Create( const ProviderOptions* provider_options_map, const SessionOptions* session_options) { // Append session options applicable for EP to EP Provider options. diff --git a/onnxruntime/python/backend/backend.py b/onnxruntime/python/backend/backend.py index 67423fe9b5a33..19f46189e2933 100644 --- a/onnxruntime/python/backend/backend.py +++ b/onnxruntime/python/backend/backend.py @@ -5,6 +5,7 @@ """ Implements ONNX's backend API. """ + import os import unittest diff --git a/onnxruntime/python/backend/backend_rep.py b/onnxruntime/python/backend/backend_rep.py index c4dddaaba1378..a30569d004d34 100644 --- a/onnxruntime/python/backend/backend_rep.py +++ b/onnxruntime/python/backend/backend_rep.py @@ -5,7 +5,6 @@ """ Implements ONNX's backend API. """ -from typing import Any, Tuple # noqa: F401 from onnx.backend.base import BackendRep diff --git a/onnxruntime/python/datasets/__init__.py b/onnxruntime/python/datasets/__init__.py index ba64aa8a6e159..1a04b37698944 100644 --- a/onnxruntime/python/datasets/__init__.py +++ b/onnxruntime/python/datasets/__init__.py @@ -3,6 +3,7 @@ """ Short examples used in the documentation. """ + import os diff --git a/onnxruntime/python/onnxruntime_inference_collection.py b/onnxruntime/python/onnxruntime_inference_collection.py index d05fba192820a..6b5f7526cc506 100644 --- a/onnxruntime/python/onnxruntime_inference_collection.py +++ b/onnxruntime/python/onnxruntime_inference_collection.py @@ -9,7 +9,8 @@ import os import typing import warnings -from typing import Any, Sequence +from collections.abc import Sequence +from typing import Any from onnxruntime.capi import _pybind_state as C @@ -115,8 +116,9 @@ def check_and_normalize_provider_args( def set_provider_options(name, options): if name not in available_provider_names: warnings.warn( - "Specified provider '{}' is not in available provider names." - "Available providers: '{}'".format(name, ", ".join(available_provider_names)) + "Specified provider '{}' is not in available provider names.Available providers: '{}'".format( + name, ", ".join(available_provider_names) + ) ) if name in provider_name_to_options: @@ -136,19 +138,19 @@ def set_provider_options(name, options): if len(providers) != len(provider_options): raise ValueError("'providers' and 'provider_options' should be the same length if both are given.") - if not all([isinstance(provider, str) for provider in providers]): + if not all(isinstance(provider, str) for provider in providers): raise ValueError("Only string values for 'providers' are supported if 'provider_options' is given.") - if not all([isinstance(options_for_provider, dict) for options_for_provider in provider_options]): + if not all(isinstance(options_for_provider, dict) for options_for_provider in provider_options): raise ValueError("'provider_options' values must be dicts.") - for name, options in zip(providers, provider_options): + for name, options in zip(providers, provider_options, strict=False): set_provider_options(name, options) else: for provider in providers: if isinstance(provider, str): - set_provider_options(provider, dict()) + set_provider_options(provider, {}) elif ( isinstance(provider, tuple) and len(provider) == 2 diff --git a/onnxruntime/python/onnxruntime_validation.py b/onnxruntime/python/onnxruntime_validation.py index 4f29c7f424845..09ce886c8f6a2 100644 --- a/onnxruntime/python/onnxruntime_validation.py +++ b/onnxruntime/python/onnxruntime_validation.py @@ -5,6 +5,7 @@ """ Check OS requirements for ONNX Runtime Python Bindings. """ + import linecache import platform import warnings diff --git a/onnxruntime/python/tools/custom_op_wrapper/create_custom_op_wrapper.py b/onnxruntime/python/tools/custom_op_wrapper/create_custom_op_wrapper.py index e0967ef5545db..76238b982fd96 100644 --- a/onnxruntime/python/tools/custom_op_wrapper/create_custom_op_wrapper.py +++ b/onnxruntime/python/tools/custom_op_wrapper/create_custom_op_wrapper.py @@ -22,7 +22,6 @@ import os import sys from dataclasses import dataclass -from typing import List, Optional, Union import onnx from onnx import TensorProto, helper @@ -65,7 +64,7 @@ class IOInfo: index: int name: str elem_type: TensorProto.DataType - shape: Optional[List[Union[int, str]]] + shape: list[int | str] | None def str_is_int(string: str) -> bool: @@ -76,7 +75,7 @@ def str_is_int(string: str) -> bool: return False -def parse_shape(shape_str: str) -> Optional[List[Union[int, str]]]: +def parse_shape(shape_str: str) -> list[int | str] | None: try: shape = [int(s) if str_is_int(s) else s for s in shape_str.split(",")] except ValueError: @@ -204,7 +203,7 @@ def parse_arguments() -> argparse.Namespace: return parser.parse_args() -def get_attributes(attr_data_info: List[List[str]]): +def get_attributes(attr_data_info: list[list[str]]): if not attr_data_info: return {} diff --git a/onnxruntime/python/tools/kernel_explorer/kernels/kernel_explorer.py b/onnxruntime/python/tools/kernel_explorer/kernels/kernel_explorer.py index 66e1a8052ce84..8be8481fd1394 100644 --- a/onnxruntime/python/tools/kernel_explorer/kernels/kernel_explorer.py +++ b/onnxruntime/python/tools/kernel_explorer/kernels/kernel_explorer.py @@ -13,11 +13,11 @@ import sys from abc import abstractmethod from argparse import Action, ArgumentParser +from collections.abc import Callable from contextlib import contextmanager from dataclasses import dataclass from fnmatch import fnmatch from functools import wraps -from typing import Callable build_dir = os.environ.get("KERNEL_EXPLORER_BUILD_DIR", None) if build_dir is None: @@ -220,7 +220,7 @@ def set_dispatch(name): from difflib import SequenceMatcher as Matcher valid_names = list(_ke_context.dispatchable.keys()) - scored_names = list(reversed(sorted([(Matcher(None, name, a).ratio(), a) for a in valid_names]))) + scored_names = sorted([(Matcher(None, name, a).ratio(), a) for a in valid_names], reverse=True) top10 = "\n ".join([a for _, a in scored_names[:10]]) msg = f"'{name}' is not registered for dispatch. Top 10 matches are:\n {top10}" print(msg) diff --git a/onnxruntime/python/tools/offline_tuning.py b/onnxruntime/python/tools/offline_tuning.py index c032685b70f7c..c55b515814a28 100644 --- a/onnxruntime/python/tools/offline_tuning.py +++ b/onnxruntime/python/tools/offline_tuning.py @@ -7,11 +7,11 @@ import sys from collections import OrderedDict from pprint import pprint -from typing import Any, Dict, List +from typing import Any import onnx -TuningResults = Dict[str, Any] +TuningResults = dict[str, Any] _TUNING_RESULTS_KEY = "tuning_results" @@ -32,7 +32,7 @@ def extract(model: onnx.ModelProto): return json.loads(tuning_results_prop.value) -def embed(model: onnx.ModelProto, tuning_results: List[TuningResults], overwrite=False): +def embed(model: onnx.ModelProto, tuning_results: list[TuningResults], overwrite=False): idx = _find_tuning_results_in_props(model.metadata_props) assert overwrite or idx <= 0, "the supplied onnx file already have tuning results embedded!" @@ -47,7 +47,7 @@ def embed(model: onnx.ModelProto, tuning_results: List[TuningResults], overwrite class Merger: class EpAndValidators: - def __init__(self, ep: str, validators: Dict[str, str]): + def __init__(self, ep: str, validators: dict[str, str]): self.ep = ep self.validators = copy.deepcopy(validators) self.key = (ep, tuple(sorted(validators.items()))) @@ -61,7 +61,7 @@ def __eq__(self, other): def __init__(self): self.ev_to_results = OrderedDict() - def merge(self, tuning_results: List[TuningResults]): + def merge(self, tuning_results: list[TuningResults]): for trs in tuning_results: self._merge_one(trs) diff --git a/onnxruntime/python/tools/profile_explorer/profile_explorer.py b/onnxruntime/python/tools/profile_explorer/profile_explorer.py index 6e0747883989f..42db05b7cc69d 100644 --- a/onnxruntime/python/tools/profile_explorer/profile_explorer.py +++ b/onnxruntime/python/tools/profile_explorer/profile_explorer.py @@ -86,7 +86,7 @@ def _shape_to_string(shape): value = next(iter(dict_obj.values())) if len(res) != 0: res += "," - res += f'{key}({"x".join(str(v) for v in value)})' + res += f"{key}({'x'.join(str(v) for v in value)})" return res @@ -200,7 +200,7 @@ def _print_op_kernel_mapping_info(cpu_df, gpu_df, num_runs, csv=None): # Count op occurrences in the selected runs op_counts = defaultdict(int) for op in cpu_df.T.to_dict().values(): - identifiers = tuple([op["name"], op["input_type_shape"]]) + identifiers = (op["name"], op["input_type_shape"]) op_counts[identifiers] += 1 # Collect kernel stats: count/duration @@ -212,7 +212,7 @@ def _print_op_kernel_mapping_info(cpu_df, gpu_df, num_runs, csv=None): input_type_shape = kernel["input_type_shape"] kernel_name = kernel["name"] dimensions = kernel["dimensions"] - identifiers = tuple([op_name, input_type_shape, kernel_name, dimensions]) + identifiers = (op_name, input_type_shape, kernel_name, dimensions) stat_dict[identifiers]["count"] += 1 stat_dict[identifiers]["duration"] += kernel["duration"] @@ -220,7 +220,7 @@ def _print_op_kernel_mapping_info(cpu_df, gpu_df, num_runs, csv=None): kernel_list = [] for identifiers, stat in stat_dict.items(): op_name, input_type_shape, kernel_name, dimensions = identifiers - op_count = op_counts.get(tuple([op_name, input_type_shape])) + op_count = op_counts.get((op_name, input_type_shape)) if op_count is None: continue kernel_list.append( diff --git a/onnxruntime/python/tools/pytorch_export_contrib_ops.py b/onnxruntime/python/tools/pytorch_export_contrib_ops.py index d8cf3c1304219..f3cd4c2c89801 100644 --- a/onnxruntime/python/tools/pytorch_export_contrib_ops.py +++ b/onnxruntime/python/tools/pytorch_export_contrib_ops.py @@ -5,6 +5,7 @@ Support for registering ONNX Runtime's built-in contrib ops with PyTorch-ONNX exporter (torch.onnx.export). """ + import typing try: diff --git a/onnxruntime/python/tools/qnn/add_trans_cast.py b/onnxruntime/python/tools/qnn/add_trans_cast.py index ced3e3519ad42..edeaa6b4e28d0 100644 --- a/onnxruntime/python/tools/qnn/add_trans_cast.py +++ b/onnxruntime/python/tools/qnn/add_trans_cast.py @@ -126,9 +126,9 @@ def parse_qnn_json_file(qnn_json_file_path, qnn_input_output_tensor_dic): qnn_tensor.dim = qnn_tensor_attribute["dims"] qnn_input_output_tensor_dic[qnn_tensor_name] = qnn_tensor - assert ( - len(qnn_input_output_tensor_dic) > 1 - ), "Converted QNN model not valid. It should have at least 1 input & 1 output." + assert len(qnn_input_output_tensor_dic) > 1, ( + "Converted QNN model not valid. It should have at least 1 input & 1 output." + ) def compare_onnx_shape_with_qnn_shape(onnx_dims, qnn_dims): diff --git a/onnxruntime/python/tools/qnn/gen_qnn_ctx_onnx_model.py b/onnxruntime/python/tools/qnn/gen_qnn_ctx_onnx_model.py index b7d32fd6b2353..7a3e364a08cfd 100644 --- a/onnxruntime/python/tools/qnn/gen_qnn_ctx_onnx_model.py +++ b/onnxruntime/python/tools/qnn/gen_qnn_ctx_onnx_model.py @@ -150,9 +150,9 @@ def parse_qnn_converter_json_file(qnn_convert_json, qnn_input_tensor_dic, qnn_ou qnn_tensor.offset = 0 - qnn_tensor_attribute["quant_params"]["scale_offset"]["offset"] qnn_output_tensor_dic[qnn_tensor_name] = qnn_tensor - assert ( - len(qnn_input_tensor_dic) >= 1 and len(qnn_output_tensor_dic) >= 1 - ), "Converted QNN model not valid. It should have at least 1 input & 1 output." + assert len(qnn_input_tensor_dic) >= 1 and len(qnn_output_tensor_dic) >= 1, ( + "Converted QNN model not valid. It should have at least 1 input & 1 output." + ) def generate_wrapper_onnx_file( @@ -286,9 +286,9 @@ def parse_qnn_graph(qnn_graph, qnn_input_tensor_dic, qnn_output_tensor_dic): qnn_tensor.offset = 0 - tensor_info["quantizeParams"]["scaleOffset"]["offset"] qnn_output_tensor_dic[qnn_tensor.name] = qnn_tensor - assert ( - len(qnn_input_tensor_dic) >= 1 and len(qnn_output_tensor_dic) >= 1 - ), "Converted QNN model not valid. It should have at least 1 input & 1 output." + assert len(qnn_input_tensor_dic) >= 1 and len(qnn_output_tensor_dic) >= 1, ( + "Converted QNN model not valid. It should have at least 1 input & 1 output." + ) return graph_name diff --git a/onnxruntime/python/tools/quantization/__init__.py b/onnxruntime/python/tools/quantization/__init__.py index 712e15a6a1ca9..ac99de348f612 100644 --- a/onnxruntime/python/tools/quantization/__init__.py +++ b/onnxruntime/python/tools/quantization/__init__.py @@ -7,11 +7,13 @@ ) from .qdq_quantizer import QDQQuantizer # noqa: F401 from .quant_utils import QuantFormat, QuantType, write_calibration_table # noqa: F401 -from .quantize import DynamicQuantConfig # noqa: F401 -from .quantize import QuantizationMode # noqa: F401 -from .quantize import StaticQuantConfig # noqa: F401 -from .quantize import get_qdq_config # noqa: F401 -from .quantize import quantize # noqa: F401 -from .quantize import quantize_dynamic # noqa: F401 -from .quantize import quantize_static # noqa: F401 +from .quantize import ( + DynamicQuantConfig, # noqa: F401 + QuantizationMode, # noqa: F401 + StaticQuantConfig, # noqa: F401 + get_qdq_config, # noqa: F401 + quantize, # noqa: F401 + quantize_dynamic, # noqa: F401 + quantize_static, # noqa: F401 +) from .shape_inference import quant_pre_process # noqa: F401 diff --git a/onnxruntime/python/tools/quantization/base_quantizer.py b/onnxruntime/python/tools/quantization/base_quantizer.py index 6235db3234d49..7bf8b2846d73b 100644 --- a/onnxruntime/python/tools/quantization/base_quantizer.py +++ b/onnxruntime/python/tools/quantization/base_quantizer.py @@ -4,7 +4,7 @@ # license information. # -------------------------------------------------------------------------- import logging -from typing import Any, Dict +from typing import Any import numpy as np import onnx @@ -36,7 +36,7 @@ class QuantizationParams: - def __init__(self, **data: Dict[str, Any]): + def __init__(self, **data: dict[str, Any]): self.data = {} for k, v in data.items(): if not isinstance(k, str): @@ -118,9 +118,9 @@ def __init__( 'Conv_4:0': [np.float32(1), np.float32(3.5)] } """ - if tensors_range is not None and any(map(lambda t: not isinstance(t, TensorData), tensors_range.values())): + if tensors_range is not None and any(not isinstance(t, TensorData) for t in tensors_range.values()): raise TypeError( - f"tensors_range contains unexpected types {set(type(v) for v in tensors_range.values())}, not TensorData." + f"tensors_range contains unexpected types { {type(v) for v in tensors_range.values()} }, not TensorData." ) self.tensors_range = tensors_range self.nodes_to_quantize = nodes_to_quantize # specific nodes to quantize @@ -331,9 +331,9 @@ def quantize_initializer_impl(self, weight, qType, reduce_range=False, keep_floa scale = np.array(quant_overrides["scale"]) q_weight_data = quantize_nparray(qType, weight_data.flatten(), scale, zero_point) assert isinstance(zero_point, np.ndarray), f"Unexpected type {type(zero_point)}" - assert ( - zero_point.dtype != np.float32 and zero_point.dtype != np.float16 - ), f"Unexpected dtype {zero_point.dtype}" + assert zero_point.dtype != np.float32 and zero_point.dtype != np.float16, ( + f"Unexpected dtype {zero_point.dtype}" + ) assert isinstance(scale, np.ndarray), f"Unexpected type {type(scale)}" else: @@ -349,9 +349,9 @@ def quantize_initializer_impl(self, weight, qType, reduce_range=False, keep_floa ) assert isinstance(zero_point, np.ndarray), f"Unexpected type {type(zero_point)}" - assert ( - zero_point.dtype != np.float32 and zero_point.dtype != np.float16 - ), f"Unexpected dtype {zero_point.dtype}" + assert zero_point.dtype != np.float32 and zero_point.dtype != np.float16, ( + f"Unexpected dtype {zero_point.dtype}" + ) assert isinstance(scale, np.ndarray), f"Unexpected type {type(scale)}" scale_dtype = weight.data_type @@ -465,13 +465,13 @@ def quantize_weight_per_channel_impl( weight_qType, per_channel_data.flatten(), scale, zero_point ) assert isinstance(zero_point, np.ndarray), f"Unexpected type {type(zero_point)}" - assert ( - zero_point.dtype != np.float32 and zero_point.dtype != np.float16 - ), f"Unexpected dtype {zero_point.dtype}" + assert zero_point.dtype != np.float32 and zero_point.dtype != np.float16, ( + f"Unexpected dtype {zero_point.dtype}" + ) assert isinstance(scale, np.ndarray), f"Unexpected type {type(scale)}" - assert isinstance( - quantized_per_channel_data, np.ndarray - ), f"Unexpected type {type(quantized_per_channel_data)}" + assert isinstance(quantized_per_channel_data, np.ndarray), ( + f"Unexpected type {type(quantized_per_channel_data)}" + ) else: zero_point, scale, quantized_per_channel_data = quantize_data( @@ -485,13 +485,13 @@ def quantize_weight_per_channel_impl( ) assert isinstance(zero_point, np.ndarray), f"Unexpected type {type(zero_point)}" - assert ( - zero_point.dtype != np.float32 and zero_point.dtype != np.float16 - ), f"Unexpected dtype {zero_point.dtype}" + assert zero_point.dtype != np.float32 and zero_point.dtype != np.float16, ( + f"Unexpected dtype {zero_point.dtype}" + ) assert isinstance(scale, np.ndarray), f"Unexpected type {type(scale)}" - assert isinstance( - quantized_per_channel_data, np.ndarray - ), f"Unexpected type {type(quantized_per_channel_data)}" + assert isinstance(quantized_per_channel_data, np.ndarray), ( + f"Unexpected type {type(quantized_per_channel_data)}" + ) zero_point_list.append(zero_point) scale_list.append(scale) diff --git a/onnxruntime/python/tools/quantization/calibrate.py b/onnxruntime/python/tools/quantization/calibrate.py index 533b2197bf30c..4d5dbcc06ddc6 100644 --- a/onnxruntime/python/tools/quantization/calibrate.py +++ b/onnxruntime/python/tools/quantization/calibrate.py @@ -9,9 +9,9 @@ import itertools import os import uuid +from collections.abc import Sequence from enum import Enum from pathlib import Path -from typing import Dict, Optional, Sequence, Tuple, Union import numpy as np import onnx @@ -39,7 +39,7 @@ def rel_entr(pk: np.ndarray, qk: np.ndarray) -> np.ndarray: def entropy( pk: np.ndarray, qk: np.ndarray, - base: Optional[float] = None, + base: float | None = None, axis: int = 0, ) -> np.ndarray: """ @@ -100,7 +100,7 @@ def to_dict(self): class TensorsData: - def __init__(self, calibration_method, data: Dict[str, Union[TensorData, Tuple]]): + def __init__(self, calibration_method, data: dict[str, TensorData | tuple]): self.calibration_method = calibration_method self.data = {} for k, v in data.items(): @@ -161,7 +161,7 @@ class CalibrationMethod(Enum): class CalibrationDataReader(metaclass=abc.ABCMeta): @classmethod def __subclasshook__(cls, subclass): - return hasattr(subclass, "get_next") and callable(subclass.get_next) or NotImplemented + return (hasattr(subclass, "get_next") and callable(subclass.get_next)) or NotImplemented @abc.abstractmethod def get_next(self) -> dict: @@ -187,8 +187,8 @@ def set_range(self, start_index: int, end_index: int): class CalibraterBase: def __init__( self, - model_path: Union[str, Path], - op_types_to_calibrate: Optional[Sequence[str]] = None, + model_path: str | Path, + op_types_to_calibrate: Sequence[str] | None = None, augmented_model_path="augmented_model.onnx", symmetric=False, use_external_data_format=False, @@ -297,8 +297,8 @@ def compute_data(self) -> TensorsData: class MinMaxCalibrater(CalibraterBase): def __init__( self, - model_path: Union[str, Path], - op_types_to_calibrate: Optional[Sequence[str]] = None, + model_path: str | Path, + op_types_to_calibrate: Sequence[str] | None = None, augmented_model_path="augmented_model.onnx", symmetric=False, use_external_data_format=False, @@ -476,7 +476,8 @@ def compute_data(self) -> TensorsData: output_names = [self.infer_session.get_outputs()[i].name for i in range(len(self.intermediate_outputs[0]))] output_dicts_list = [ - dict(zip(output_names, intermediate_output)) for intermediate_output in self.intermediate_outputs + dict(zip(output_names, intermediate_output, strict=False)) + for intermediate_output in self.intermediate_outputs ] merged_output_dict = {} @@ -503,11 +504,13 @@ def compute_data(self) -> TensorsData: if self.symmetric: max_absolute_value = np.max([np.abs(min_value_array), np.abs(max_value_array)], axis=0) - pairs.append(tuple([-max_absolute_value, max_absolute_value])) + pairs.append((-max_absolute_value, max_absolute_value)) else: - pairs.append(tuple([min_value_array, max_value_array])) + pairs.append((min_value_array, max_value_array)) - new_calibrate_tensors_range = TensorsData(CalibrationMethod.MinMax, dict(zip(calibrate_tensor_names, pairs))) + new_calibrate_tensors_range = TensorsData( + CalibrationMethod.MinMax, dict(zip(calibrate_tensor_names, pairs, strict=False)) + ) if self.calibrate_tensors_range: self.calibrate_tensors_range = self.merge_range(self.calibrate_tensors_range, new_calibrate_tensors_range) else: @@ -519,8 +522,8 @@ def compute_data(self) -> TensorsData: class HistogramCalibrater(CalibraterBase): def __init__( self, - model_path: Union[str, Path], - op_types_to_calibrate: Optional[Sequence[str]] = None, + model_path: str | Path, + op_types_to_calibrate: Sequence[str] | None = None, augmented_model_path="augmented_model.onnx", use_external_data_format=False, method="percentile", @@ -608,7 +611,8 @@ def collect_data(self, data_reader: CalibrationDataReader): raise ValueError("No data is collected.") output_dicts_list = [ - dict(zip(output_names, intermediate_output)) for intermediate_output in self.intermediate_outputs + dict(zip(output_names, intermediate_output, strict=False)) + for intermediate_output in self.intermediate_outputs ] merged_dict = {} @@ -653,8 +657,8 @@ def compute_data(self) -> TensorsData: class EntropyCalibrater(HistogramCalibrater): def __init__( self, - model_path: Union[str, Path], - op_types_to_calibrate: Optional[Sequence[str]] = None, + model_path: str | Path, + op_types_to_calibrate: Sequence[str] | None = None, augmented_model_path="augmented_model.onnx", use_external_data_format=False, method="entropy", @@ -687,8 +691,8 @@ def __init__( class PercentileCalibrater(HistogramCalibrater): def __init__( self, - model_path: Union[str, Path], - op_types_to_calibrate: Optional[Sequence[str]] = None, + model_path: str | Path, + op_types_to_calibrate: Sequence[str] | None = None, augmented_model_path="augmented_model.onnx", use_external_data_format=False, method="percentile", @@ -721,8 +725,8 @@ def __init__( class DistributionCalibrater(HistogramCalibrater): def __init__( self, - model_path: Union[str, Path], - op_types_to_calibrate: Optional[Sequence[str]] = None, + model_path: str | Path, + op_types_to_calibrate: Sequence[str] | None = None, augmented_model_path="augmented_model.onnx", use_external_data_format=False, method="distribution", @@ -819,10 +823,10 @@ def collect_absolute_value(self, name_to_arr): if isinstance(data_arr, list): for arr in data_arr: assert isinstance(arr, np.ndarray), f"Unexpected type {type(arr)} for tensor={tensor!r}" - dtypes = set(a.dtype for a in data_arr) - assert ( - len(dtypes) == 1 - ), f"The calibration expects only one element type but got {dtypes} for tensor={tensor!r}" + dtypes = {a.dtype for a in data_arr} + assert len(dtypes) == 1, ( + f"The calibration expects only one element type but got {dtypes} for tensor={tensor!r}" + ) data_arr_np = np.asarray(data_arr) elif not isinstance(data_arr, np.ndarray): raise ValueError(f"Unexpected type {type(data_arr)} for tensor={tensor!r}") @@ -842,9 +846,9 @@ def collect_absolute_value(self, name_to_arr): # first time it uses num_bins to compute histogram. hist, hist_edges = np.histogram(data_arr_np, bins=self.num_bins) hist_edges = hist_edges.astype(data_arr_np.dtype) - assert ( - data_arr_np.dtype != np.float64 - ), "only float32 or float16 is supported, every constant must be explicitly typed" + assert data_arr_np.dtype != np.float64, ( + "only float32 or float16 is supported, every constant must be explicitly typed" + ) self.histogram_dict[tensor] = (hist, hist_edges, min_value, max_value) else: old_histogram = self.histogram_dict[tensor] @@ -864,9 +868,9 @@ def collect_absolute_value(self, name_to_arr): hist, hist_edges = np.histogram(data_arr_np, bins=old_hist_edges) hist_edges = hist_edges.astype(data_arr_np.dtype) hist[: len(old_hist)] += old_hist - assert ( - data_arr_np.dtype != np.float64 - ), "only float32 or float16 is supported, every constant must be explicitly typed" + assert data_arr_np.dtype != np.float64, ( + "only float32 or float16 is supported, every constant must be explicitly typed" + ) self.histogram_dict[tensor] = (hist, hist_edges, min(old_min, min_value), max(old_max, max_value)) def collect_value(self, name_to_arr): @@ -1168,8 +1172,8 @@ def get_entropy_threshold(self, histogram, num_quantized_bins): def create_calibrator( - model: Union[str, Path], - op_types_to_calibrate: Optional[Sequence[str]] = None, + model: str | Path, + op_types_to_calibrate: Sequence[str] | None = None, augmented_model_path="augmented_model.onnx", calibrate_method=CalibrationMethod.MinMax, use_external_data_format=False, diff --git a/onnxruntime/python/tools/quantization/execution_providers/qnn/mixed_precision_overrides_utils.py b/onnxruntime/python/tools/quantization/execution_providers/qnn/mixed_precision_overrides_utils.py index 6396e87c73d03..fe1192d2dd119 100644 --- a/onnxruntime/python/tools/quantization/execution_providers/qnn/mixed_precision_overrides_utils.py +++ b/onnxruntime/python/tools/quantization/execution_providers/qnn/mixed_precision_overrides_utils.py @@ -178,7 +178,7 @@ def apply( # Use type requests to "fix" tensor quantization overrides by adding # quantization type conversions where necessary. for tensor_name, type_req in type_requests.items(): - all_consumers = set([node.name for node in self.consumers.get(tensor_name, [])]) + all_consumers = {node.name for node in self.consumers.get(tensor_name, [])} has_producer_req = type_req.producer is not None has_consumer_req = bool(type_req.consumers) diff --git a/onnxruntime/python/tools/quantization/matmul_4bits_quantizer.py b/onnxruntime/python/tools/quantization/matmul_4bits_quantizer.py index 1d91141a117ad..b4ee5074754dc 100644 --- a/onnxruntime/python/tools/quantization/matmul_4bits_quantizer.py +++ b/onnxruntime/python/tools/quantization/matmul_4bits_quantizer.py @@ -1186,7 +1186,7 @@ def _generate_q4_node_config(self): } for node in self.model.model.graph.node: if node.op_type in ["MatMul"]: - if not all([self.model.get_initializer(i) is None for i in node.input]): + if not all(self.model.get_initializer(i) is None for i in node.input): q4_node_config[node.name] = template_config_q4 return q4_node_config @@ -1259,7 +1259,6 @@ def process(self): self._process_subgraph(graph_stack) self.model.clean_initializers() elif self.algo_config.algorithm == "nvidia_awq": - # Handle nvidia_awq quantization logger.info("Processing nvidia_awq quantization...") self.model = self.node_quantizer.quantize_awq( @@ -1280,9 +1279,9 @@ def process(self): import neural_compressor - assert version.parse(neural_compressor.__version__) >= version.parse( - "2.3.2" - ), "Require neural-compressor >= 2.3.2 to support weight only quantization!" + assert version.parse(neural_compressor.__version__) >= version.parse("2.3.2"), ( + "Require neural-compressor >= 2.3.2 to support weight only quantization!" + ) self.int4_quant_algo() @@ -1446,7 +1445,6 @@ def parse_args(): elif args.quant_method == "gptq": quant_config = GPTQWeightOnlyQuantConfig(block_size=args.block_size, op_types_to_quantize=op_types_to_quantize) elif args.quant_method == "nvidia_awq": - if quant_format == QuantFormat.QOperator: logger.warning("QOperator is not applicable to nvidia_awq. overriding the value to QDQ") quant_format = QuantFormat.QDQ diff --git a/onnxruntime/python/tools/quantization/matmul_bnb4_quantizer.py b/onnxruntime/python/tools/quantization/matmul_bnb4_quantizer.py index 2bf47fe1680e9..2e8ee11e2f864 100644 --- a/onnxruntime/python/tools/quantization/matmul_bnb4_quantizer.py +++ b/onnxruntime/python/tools/quantization/matmul_bnb4_quantizer.py @@ -7,7 +7,6 @@ import argparse import logging import os -from typing import List, Tuple import numpy as np import numpy.typing as npt @@ -44,7 +43,7 @@ def __init__(self, model: ModelProto, quant_type: int, block_size: int, nodes_to self.nodes_to_exclude = set(nodes_to_exclude) @staticmethod - def __get_initializer(name, graph_path: List[GraphProto]) -> Tuple[TensorProto, GraphProto]: + def __get_initializer(name, graph_path: list[GraphProto]) -> tuple[TensorProto, GraphProto]: for gid in range(len(graph_path) - 1, -1, -1): graph = graph_path[gid] for tensor in graph.initializer: @@ -74,7 +73,7 @@ def bnb4_block_quant(self, fpweight: npt.ArrayLike) -> np.ndarray: return (packed, absmax) - def _bnb4_matmul_node_weight(self, node: NodeProto, graph_stack: List[GraphProto]) -> NodeProto: + def _bnb4_matmul_node_weight(self, node: NodeProto, graph_stack: list[GraphProto]) -> NodeProto: """If the node is MatMul with fp32 const weight, quantize the weight with int4, and return the new node""" if node.op_type != "MatMul": @@ -129,7 +128,7 @@ def _bnb4_matmul_node_weight(self, node: NodeProto, graph_stack: List[GraphProto return matmul_bnb4_node - def _process_subgraph(self, graph_stack: List[GraphProto]): + def _process_subgraph(self, graph_stack: list[GraphProto]): new_nodes = [] graph = graph_stack[-1] diff --git a/onnxruntime/python/tools/quantization/onnx_model.py b/onnxruntime/python/tools/quantization/onnx_model.py index 43105550139de..4e3ef5febf382 100644 --- a/onnxruntime/python/tools/quantization/onnx_model.py +++ b/onnxruntime/python/tools/quantization/onnx_model.py @@ -576,7 +576,7 @@ def _check_init(self, init, test=None): if init.data_type == onnx.TensorProto.FLOAT8E4M3FN: if init.HasField("raw_data"): b = list(init.raw_data) - if any(map(lambda i: (i & 127) == 127, b)): + if any((i & 127) == 127 for i in b): raise ValueError(f"Initializer {init.name!r} has nan.") return init diff --git a/onnxruntime/python/tools/quantization/operators/conv.py b/onnxruntime/python/tools/quantization/operators/conv.py index 922884a5f6383..7c5248f90f813 100644 --- a/onnxruntime/python/tools/quantization/operators/conv.py +++ b/onnxruntime/python/tools/quantization/operators/conv.py @@ -158,7 +158,9 @@ def quantize(self): nodes, ) = self.quantizer.quantize_activation(node, [0]) quant_weight_tuple = self.quantizer.quantize_weight_per_channel( - node.input[1], onnx_proto.TensorProto.INT8, 0 # self.quantizer.weight_qType? + node.input[1], + onnx_proto.TensorProto.INT8, + 0, # self.quantizer.weight_qType? ) quantized_input_names.append(quant_weight_tuple[0]) zero_point_names.append(quant_weight_tuple[1]) diff --git a/onnxruntime/python/tools/quantization/operators/gemm.py b/onnxruntime/python/tools/quantization/operators/gemm.py index 5d7bf6e2cd2d7..6b8a389824b2d 100644 --- a/onnxruntime/python/tools/quantization/operators/gemm.py +++ b/onnxruntime/python/tools/quantization/operators/gemm.py @@ -3,9 +3,15 @@ import numpy as np # noqa: F401 import onnx -from ..quant_utils import find_by_name # noqa: F401 -from ..quant_utils import get_mul_node # noqa: F401 -from ..quant_utils import TENSOR_NAME_QUANT_SUFFIX, QuantizedValue, QuantizedValueType, attribute_to_kwarg, ms_domain +from ..quant_utils import ( + TENSOR_NAME_QUANT_SUFFIX, + QuantizedValue, + QuantizedValueType, + attribute_to_kwarg, + find_by_name, # noqa: F401 + get_mul_node, # noqa: F401 + ms_domain, +) from .base_operator import QuantOperatorBase # noqa: F401 from .matmul import QOpMatMul from .qdq_base_operator import QDQOperatorBase diff --git a/onnxruntime/python/tools/quantization/operators/lstm.py b/onnxruntime/python/tools/quantization/operators/lstm.py index 3ad3147cb8db9..3a0c94aca67c1 100644 --- a/onnxruntime/python/tools/quantization/operators/lstm.py +++ b/onnxruntime/python/tools/quantization/operators/lstm.py @@ -47,10 +47,14 @@ def quantize(self): R.dims[0] = R_num_dir * R_4_hidden_size quant_input_weight_tuple = self.quantizer.quantize_weight_per_channel( - node.input[1], onnx_proto.TensorProto.INT8, 0 # self.quantizer.weight_qType? + node.input[1], + onnx_proto.TensorProto.INT8, + 0, # self.quantizer.weight_qType? ) quant_recurrent_weight_tuple = self.quantizer.quantize_weight_per_channel( - node.input[2], onnx_proto.TensorProto.INT8, 0 # self.quantizer.weight_qType? + node.input[2], + onnx_proto.TensorProto.INT8, + 0, # self.quantizer.weight_qType? ) W_quant_weight = model.get_initializer(quant_input_weight_tuple[0]) # noqa: N806 diff --git a/onnxruntime/python/tools/quantization/qdq_loss_debug.py b/onnxruntime/python/tools/quantization/qdq_loss_debug.py index f9ed844febe46..d5219468309a5 100644 --- a/onnxruntime/python/tools/quantization/qdq_loss_debug.py +++ b/onnxruntime/python/tools/quantization/qdq_loss_debug.py @@ -37,8 +37,8 @@ def get_next(self): import logging import math import time +from collections.abc import Callable, Sequence from pathlib import Path -from typing import Callable, Dict, List, Optional, Sequence, Union import numpy import onnx @@ -62,9 +62,9 @@ def get_next(self): def modify_model_output_intermediate_tensors( - input_model_path: Union[str, Path], - output_model_path: Union[str, Path], - op_types_for_saving: Optional[Sequence[str]] = None, + input_model_path: str | Path, + output_model_path: str | Path, + op_types_for_saving: Sequence[str] | None = None, save_as_external_data: bool = False, ) -> None: """Augment a given ONNX model to save node input/output tensors. @@ -116,8 +116,8 @@ def collect_activations( augmented_model: str, input_reader: CalibrationDataReader, session_options=None, - execution_providers: Optional[Sequence[str]] = None, -) -> Dict[str, List[numpy.ndarray]]: + execution_providers: Sequence[str] | None = None, +) -> dict[str, list[numpy.ndarray]]: """Run augmented model and collect activations tensors. Args: @@ -154,7 +154,7 @@ def collect_activations( output_dict = {} output_info = inference_session.get_outputs() for batch in intermediate_outputs: - for output, output_data in zip(output_info, batch): + for output, output_data in zip(output_info, batch, strict=False): if output.name.endswith(_TENSOR_SAVE_POSTFIX): output_name = output.name[:-_TENSOR_SAVE_POSTFIX_LEN] output_dict.setdefault(output_name, []).append(output_data) @@ -166,10 +166,10 @@ def collect_activations( def _add_pre_post_qdq_pair( - qdq_cmp: Dict[str, Dict[str, Sequence[numpy.ndarray]]], + qdq_cmp: dict[str, dict[str, Sequence[numpy.ndarray]]], activation_name: str, - pre_qdq_tensors: Optional[Sequence[numpy.ndarray]], - post_qdq_tensors: Optional[Sequence[numpy.ndarray]], + pre_qdq_tensors: Sequence[numpy.ndarray] | None, + post_qdq_tensors: Sequence[numpy.ndarray] | None, ) -> None: if post_qdq_tensors is not None and pre_qdq_tensors is not None: qdq_cmp[activation_name] = {} @@ -178,9 +178,9 @@ def _add_pre_post_qdq_pair( def create_activation_matching( - qdq_activations: Dict[str, Sequence[numpy.ndarray]], - float_activations: Optional[Dict[str, Sequence[numpy.ndarray]]] = None, -) -> Dict[str, Dict[str, Sequence[numpy.ndarray]]]: + qdq_activations: dict[str, Sequence[numpy.ndarray]], + float_activations: dict[str, Sequence[numpy.ndarray]] | None = None, +) -> dict[str, dict[str, Sequence[numpy.ndarray]]]: """Comparing activation values to help debugging accuracy loss due to quantization. This functions takes saved activations from the QDQ model and (optionally) the @@ -210,7 +210,7 @@ def create_activation_matching( ``` """ - qdq_cmp: Dict[str, Dict[str, Sequence[numpy.ndarray]]] = {} + qdq_cmp: dict[str, dict[str, Sequence[numpy.ndarray]]] = {} for tensor_name, tensors in qdq_activations.items(): if tensor_name.endswith(QUANT_INPUT_SUFFIX): pre_name = tensor_name[: -len(QUANT_INPUT_SUFFIX)] @@ -241,7 +241,7 @@ def create_activation_matching( def _run_dequantize_linear( weight_tensor: numpy.ndarray, weight_scale: numpy.ndarray, weight_zp: numpy.ndarray, channel_axis: int -) -> Optional[numpy.ndarray]: +) -> numpy.ndarray | None: assert weight_scale.shape == weight_zp.shape if weight_zp.size == 1: return (weight_tensor - weight_zp) * weight_scale @@ -267,7 +267,7 @@ def _run_dequantize_linear( return dequantized_weights -def create_weight_matching(float_model_path: str, qdq_model_path: str) -> Dict[str, Dict[str, numpy.ndarray]]: +def create_weight_matching(float_model_path: str, qdq_model_path: str) -> dict[str, dict[str, numpy.ndarray]]: """Comparing weight values to help debugging accuracy loss due to quantization. This functions takes the float model and the qdq model, and provides a data structure for comparing @@ -288,7 +288,7 @@ def create_weight_matching(float_model_path: str, qdq_model_path: str) -> Dict[s float_onnx_model = ONNXModel(load_model_with_shape_infer(Path(float_model_path))) qdq_onnx_model = ONNXModel(load_model_with_shape_infer(Path(qdq_model_path))) - matched_weights: Dict[str, Dict[str, numpy.ndarray]] = {} + matched_weights: dict[str, dict[str, numpy.ndarray]] = {} initializers = qdq_onnx_model.initializer() for node in qdq_onnx_model.nodes(): if node.op_type != DEQUANT_OP_NAME: @@ -316,8 +316,8 @@ def create_weight_matching(float_model_path: str, qdq_model_path: str) -> Dict[s # Perform dequantization: if weight_scale.size == weight_zp.size == 1: # Avoids the confusion between a scaler and a tensor of one element. - weight_scale = weight_scale.reshape(tuple()) - weight_zp = weight_zp.reshape(tuple()) + weight_scale = weight_scale.reshape(()) + weight_zp = weight_zp.reshape(()) if weight_scale.shape != weight_zp.shape: raise RuntimeError( f"scale and zero_point must have the same shape but {weight_scale.shape} != {weight_zp.shape}" @@ -339,7 +339,7 @@ def create_weight_matching(float_model_path: str, qdq_model_path: str) -> Dict[s def compute_signal_to_quantization_noice_ratio( - x: Union[Sequence[numpy.ndarray], numpy.ndarray], y: Union[Sequence[numpy.ndarray], numpy.ndarray] + x: Sequence[numpy.ndarray] | numpy.ndarray, y: Sequence[numpy.ndarray] | numpy.ndarray ) -> float: if isinstance(x, numpy.ndarray): xlist = [x] @@ -363,24 +363,24 @@ def compute_signal_to_quantization_noice_ratio( def compute_weight_error( - weights_match: Dict[str, Dict[str, numpy.ndarray]], + weights_match: dict[str, dict[str, numpy.ndarray]], err_func: Callable[[numpy.ndarray, numpy.ndarray], float] = compute_signal_to_quantization_noice_ratio, -) -> Dict[str, float]: - result: Dict[str, float] = {} +) -> dict[str, float]: + result: dict[str, float] = {} for weight_name, weight_match in weights_match.items(): result[weight_name] = err_func(weight_match["float"], weight_match["dequantized"]) return result def compute_activation_error( - activations_match: Dict[str, Dict[str, Sequence[numpy.ndarray]]], + activations_match: dict[str, dict[str, Sequence[numpy.ndarray]]], err_func: Callable[ [Sequence[numpy.ndarray], Sequence[numpy.ndarray]], float ] = compute_signal_to_quantization_noice_ratio, -) -> Dict[str, Dict[str, float]]: - result: Dict[str, Dict[str, float]] = {} +) -> dict[str, dict[str, float]]: + result: dict[str, dict[str, float]] = {} for name, match in activations_match.items(): - err_result: Dict[str, float] = {} + err_result: dict[str, float] = {} err_result["qdq_err"] = err_func(match["pre_qdq"], match["post_qdq"]) float_activation = match["float"] if float_activation: diff --git a/onnxruntime/python/tools/quantization/qdq_quantizer.py b/onnxruntime/python/tools/quantization/qdq_quantizer.py index 5552a4451c542..c911febb3b197 100644 --- a/onnxruntime/python/tools/quantization/qdq_quantizer.py +++ b/onnxruntime/python/tools/quantization/qdq_quantizer.py @@ -842,7 +842,7 @@ def _add_qdq_ops_for_converted_activation( ---> Q1 ---> DQ1 ---> Q2 ---> DQ2 ---> """ - tensor_recv_nodes = set([node.name for node in self.tensor_to_its_receiving_nodes.get(tensor_name, [])]) + tensor_recv_nodes = {node.name for node in self.tensor_to_its_receiving_nodes.get(tensor_name, [])} if ( self.dedicated_qdq_pair @@ -1253,9 +1253,9 @@ def _make_scale_zp_initializers( scale = quant_params["scale"] zero_point_type = quant_params["quant_type"] axis: int | None = quant_params.get("axis") - assert (axis is not None and len(scale.shape) == 1) or ( - axis is None and len(scale.shape) == 0 - ), "Wrong scale/zp shapes" + assert (axis is not None and len(scale.shape) == 1) or (axis is None and len(scale.shape) == 0), ( + "Wrong scale/zp shapes" + ) assert len(scale.shape) == len(zero_point.shape), "Scale and zero-point must have the same rank" zero_point_name = param_name + "_zero_point" + init_name_suffix diff --git a/onnxruntime/python/tools/quantization/quant_utils.py b/onnxruntime/python/tools/quantization/quant_utils.py index 2bf675745d093..7dd8a7cafc846 100644 --- a/onnxruntime/python/tools/quantization/quant_utils.py +++ b/onnxruntime/python/tools/quantization/quant_utils.py @@ -5,6 +5,7 @@ # -------------------------------------------------------------------------- from __future__ import annotations +import copy import logging import os import tempfile @@ -196,9 +197,9 @@ def _check_type(*args, zero_point_index=-1): def quantize_nparray(qType, arr, scale, zero_point, low=None, high=None): - assert ( - qType in ONNX_TYPE_TO_NP_TYPE - ), f"Unexpected data type {qType} requested. Only INT8, UINT8, INT16, and UINT16 are supported." + assert qType in ONNX_TYPE_TO_NP_TYPE, ( + f"Unexpected data type {qType} requested. Only INT8, UINT8, INT16, and UINT16 are supported." + ) if qType in ( onnx_proto.TensorProto.FLOAT8E4M3FN, onnx_proto.TensorProto.FLOAT8E4M3FNUZ, @@ -906,11 +907,7 @@ def smooth_distribution(p, eps=0.0001): # raise ValueError('The discrete probability distribution is malformed. All entries are 0.') return None eps1 = eps * float(n_zeros) / float(n_nonzeros) - assert eps1 < 1.0, "n_zeros=%d, n_nonzeros=%d, eps1=%f" % ( - n_zeros, - n_nonzeros, - eps1, - ) + assert eps1 < 1.0, f"n_zeros={n_zeros}, n_nonzeros={n_nonzeros}, eps1={eps1}" hist = p.astype(numpy.float32) hist += eps * is_zeros + (-eps1) * is_nonzeros @@ -921,10 +918,7 @@ def smooth_distribution(p, eps=0.0001): def model_has_external_data(model_path: Path): model = onnx.load(model_path.as_posix(), load_external_data=False) - for intializer in model.graph.initializer: - if external_data_helper.uses_external_data(intializer): - return True - return False + return any(external_data_helper.uses_external_data(intializer) for intializer in model.graph.initializer) def optimize_model(model_path: Path, opt_model_path: Path): @@ -988,8 +982,9 @@ def load_model_with_shape_infer(model_path: Path) -> ModelProto: def save_and_reload_model_with_shape_infer(model: ModelProto) -> ModelProto: with tempfile.TemporaryDirectory(prefix="ort.quant.") as quant_tmp_dir: + model_copy = copy.deepcopy(model) model_path = Path(quant_tmp_dir).joinpath("model.onnx") - onnx.save_model(model, model_path.as_posix(), save_as_external_data=True) + onnx.save_model(model_copy, model_path.as_posix(), save_as_external_data=True) return load_model_with_shape_infer(model_path) diff --git a/onnxruntime/python/tools/quantization/quantize.py b/onnxruntime/python/tools/quantization/quantize.py index 4ffd8b9872982..27221f9445c30 100644 --- a/onnxruntime/python/tools/quantization/quantize.py +++ b/onnxruntime/python/tools/quantization/quantize.py @@ -8,8 +8,9 @@ import copy import logging import tempfile +from collections.abc import Callable from pathlib import Path -from typing import Any, Callable +from typing import Any import onnx diff --git a/onnxruntime/python/tools/quantization/shape_inference.py b/onnxruntime/python/tools/quantization/shape_inference.py index c07007f9d6129..63d34e1167de4 100644 --- a/onnxruntime/python/tools/quantization/shape_inference.py +++ b/onnxruntime/python/tools/quantization/shape_inference.py @@ -9,7 +9,6 @@ import tempfile import traceback from pathlib import Path -from typing import Optional, Union import onnx @@ -23,8 +22,8 @@ def quant_pre_process( - input_model: Optional[Union[str, Path, onnx.ModelProto]] = None, - output_model_path: Optional[Union[str, Path]] = None, + input_model: str | Path | onnx.ModelProto | None = None, + output_model_path: str | Path | None = None, skip_optimization: bool = False, skip_onnx_shape: bool = False, skip_symbolic_shape: bool = False, @@ -34,7 +33,7 @@ def quant_pre_process( verbose: int = 0, save_as_external_data: bool = False, all_tensors_to_one_file: bool = False, - external_data_location: Optional[str] = None, + external_data_location: str | None = None, external_data_size_threshold: int = 1024, **deprecated_kwargs, ) -> None: diff --git a/onnxruntime/python/tools/symbolic_shape_infer.py b/onnxruntime/python/tools/symbolic_shape_infer.py index f88011c7a2cf9..b9675d4280e59 100755 --- a/onnxruntime/python/tools/symbolic_shape_infer.py +++ b/onnxruntime/python/tools/symbolic_shape_infer.py @@ -205,6 +205,7 @@ def __init__(self, int_max, auto_merge, guess_output_rank, verbose, prefix=""): "GemmFastGelu": self._infer_GemmFastGelu, "GemmFloat8": self._infer_GemmFloat8, "GroupNorm": self._infer_GroupNorm, + "GroupNormalization": self._infer_GroupNorm, "GroupQueryAttention": self._infer_GroupQueryAttention, "LayerNormalization": self._infer_LayerNormalization, "LongformerAttention": self._infer_LongformerAttention, @@ -258,7 +259,7 @@ def __init__(self, int_max, auto_merge, guess_output_rank, verbose, prefix=""): self.prefix_ = prefix def _add_suggested_merge(self, symbols, apply=False): - assert all([(type(s) is str and s in self.symbolic_dims_) or is_literal(s) for s in symbols]) + assert all((type(s) is str and s in self.symbolic_dims_) or is_literal(s) for s in symbols) symbols = set(symbols) for k, v in self.suggested_merge_.items(): if k in symbols: @@ -328,7 +329,7 @@ def _preprocess(self, in_mp): ) def _merge_symbols(self, dims): - if not all([type(d) is str for d in dims]): + if not all(type(d) is str for d in dims): if self.auto_merge_: unique_dims = list(set(dims)) is_int = [is_literal(d) for d in unique_dims] @@ -347,10 +348,10 @@ def _merge_symbols(self, dims): return dims[0] else: return None - if all([d == dims[0] for d in dims]): + if all(d == dims[0] for d in dims): return dims[0] merged = [self.suggested_merge_.get(d, d) for d in dims] - if all([d == merged[0] for d in merged]): + if all(d == merged[0] for d in merged): assert merged[0] in self.symbolic_dims_ return merged[0] else: @@ -474,6 +475,7 @@ def _onnx_infer_single_node(self, node): "PythonOp", "MultiHeadAttention", "GroupNorm", + "GroupNormalization", "GroupQueryAttention", "SparseAttention", "SkipGroupNorm", @@ -607,7 +609,7 @@ def int_or_float(value, allow_float_values): return int(value) values = [self._try_get_value(node, i) for i in range(len(node.input))] - if all([v is not None for v in values]): + if all(v is not None for v in values): # some shape compute is in floating point, cast to int for sympy for i, v in enumerate(values): if type(v) is not np.ndarray: @@ -647,11 +649,11 @@ def _compute_on_sympy_data(self, node, op_func): else: values = self._get_int_or_float_values(node, broadcast=True) - if all([v is not None for v in values]): + if all(v is not None for v in values): is_list = [isinstance(v, list) for v in values] as_list = any(is_list) if as_list: - self.sympy_data_[node.output[0]] = [op_func(vs) for vs in zip(*values)] + self.sympy_data_[node.output[0]] = [op_func(vs) for vs in zip(*values, strict=False)] else: self.sympy_data_[node.output[0]] = op_func(values) @@ -722,21 +724,21 @@ def _compute_conv_pool_shape(self, node, channels_last=False): dilations = get_attribute(node, "dilations", [1] * rank) strides = get_attribute(node, "strides", [1] * rank) - effective_kernel_shape = [(k - 1) * d + 1 for k, d in zip(kernel_shape, dilations)] + effective_kernel_shape = [(k - 1) * d + 1 for k, d in zip(kernel_shape, dilations, strict=False)] pads = get_attribute(node, "pads") if pads is None: pads = [0] * (2 * rank) auto_pad = get_attribute(node, "auto_pad", b"NOTSET").decode("utf-8") if auto_pad != "VALID" and auto_pad != "NOTSET": try: - residual = [sympy.Mod(d, s) for d, s in zip(sympy_shape[-rank:], strides)] + residual = [sympy.Mod(d, s) for d, s in zip(sympy_shape[-rank:], strides, strict=False)] total_pads = [ max(0, (k - s) if r == 0 else (k - r)) - for k, s, r in zip(effective_kernel_shape, strides, residual) + for k, s, r in zip(effective_kernel_shape, strides, residual, strict=False) ] except TypeError: # sympy may throw TypeError: cannot determine truth value of Relational total_pads = [ - max(0, (k - s)) for k, s in zip(effective_kernel_shape, strides) + max(0, (k - s)) for k, s in zip(effective_kernel_shape, strides, strict=False) ] # assuming no residual if sympy throws error elif auto_pad == "VALID": total_pads = [] @@ -744,7 +746,7 @@ def _compute_conv_pool_shape(self, node, channels_last=False): total_pads = [0] * rank else: assert len(pads) == 2 * rank - total_pads = [p1 + p2 for p1, p2 in zip(pads[:rank], pads[rank:])] + total_pads = [p1 + p2 for p1, p2 in zip(pads[:rank], pads[rank:], strict=False)] ceil_mode = get_attribute(node, "ceil_mode", 0) for i in range(rank): @@ -763,7 +765,7 @@ def _compute_conv_pool_shape(self, node, channels_last=False): def _check_merged_dims(self, dims, allow_broadcast=True): if allow_broadcast: dims = [d for d in dims if not (is_literal(d) and int(d) <= 1)] - if not all([d == dims[0] for d in dims]): + if not all(d == dims[0] for d in dims): self._add_suggested_merge(dims, apply=True) def _compute_matmul_shape(self, node, output_dtype=None): @@ -815,7 +817,7 @@ def _fuse_tensor_type(self, node, out_idx, dst_type, src_type): f"{onnx.onnx_pb.TensorProto.DataType.Name(src_tensor_type.elem_type)}" ) if dst_tensor_type.HasField("shape"): - for di, ds in enumerate(zip(dst_tensor_type.shape.dim, src_tensor_type.shape.dim)): + for di, ds in enumerate(zip(dst_tensor_type.shape.dim, src_tensor_type.shape.dim, strict=False)): if ds[0] != ds[1]: # create a new symbolic dimension for node/out_idx/mismatch dim id in dst_tensor_type for tensor_type # for sequence_type, clear the dimension @@ -897,9 +899,9 @@ def _infer_Compress(self, node): # noqa: N802 ) def _infer_Concat(self, node): # noqa: N802 - if any([i in self.sympy_data_ or i in self.initializers_ for i in node.input]): + if any(i in self.sympy_data_ or i in self.initializers_ for i in node.input): values = self._get_int_or_float_values(node) - if all([v is not None for v in values]): + if all(v is not None for v in values): assert get_attribute(node, "axis") == 0 self.sympy_data_[node.output[0]] = [] for i in range(len(node.input)): @@ -921,7 +923,7 @@ def _infer_Concat(self, node): # noqa: N802 if d == axis: continue dims = [self._get_shape(node, i_idx)[d] for i_idx in range(len(node.input)) if self._get_shape(node, i_idx)] - if all([d == dims[0] for d in dims]): + if all(d == dims[0] for d in dims): continue merged = self._merge_symbols(dims) if type(merged) is str: @@ -968,7 +970,7 @@ def _infer_ConstantOfShape(self, node): # noqa: N802 sympy_shape = [sympy_shape] self._update_computed_dims(sympy_shape) # update sympy data if output type is int, and shape is known - if vi.type.tensor_type.elem_type == onnx.TensorProto.INT64 and all([is_literal(x) for x in sympy_shape]): + if vi.type.tensor_type.elem_type == onnx.TensorProto.INT64 and all(is_literal(x) for x in sympy_shape): self.sympy_data_[node.output[0]] = np.ones( [int(x) for x in sympy_shape], dtype=np.int64 ) * numpy_helper.to_array(get_attribute(node, "value", 0)) @@ -1222,7 +1224,7 @@ def _infer_Loop(self, node): # noqa: N802 else: si = subgraph.input[i_out + 1] si_shape = get_shape_from_value_info(si) - for di, dims in enumerate(zip(si_shape, so_shape)): + for di, dims in enumerate(zip(si_shape, so_shape, strict=False)): if dims[0] != dims[1]: new_dim = onnx.TensorShapeProto.Dimension() new_dim.dim_param = str(self._new_symbolic_dim_from_output(node, i_out, di)) @@ -1319,7 +1321,8 @@ def _infer_Pad(self, node): # noqa: N802 if pads is not None: assert len(pads) == 2 * rank new_sympy_shape = [ - d + pad_up + pad_down for d, pad_up, pad_down in zip(sympy_shape, pads[:rank], pads[rank:]) + d + pad_up + pad_down + for d, pad_up, pad_down in zip(sympy_shape, pads[:rank], pads[rank:], strict=False) ] self._update_computed_dims(new_sympy_shape) else: @@ -1551,7 +1554,7 @@ def _infer_BatchNormalization(self, node): # noqa: N802 def _infer_Range(self, node): # noqa: N802 vi = self.known_vi_[node.output[0]] input_data = self._get_int_or_float_values(node) - if all([i is not None for i in input_data]): + if all(i is not None for i in input_data): start = as_scalar(input_data[0]) limit = as_scalar(input_data[1]) delta = as_scalar(input_data[2]) @@ -1679,7 +1682,9 @@ def _infer_Resize(self, node): # noqa: N802 if get_opset(self.out_mp_) <= 10: scales = self._try_get_value(node, 1) if scales is not None: - new_sympy_shape = [sympy.simplify(sympy.floor(d * s)) for d, s in zip(input_sympy_shape, scales)] + new_sympy_shape = [ + sympy.simplify(sympy.floor(d * s)) for d, s in zip(input_sympy_shape, scales, strict=False) + ] self._update_computed_dims(new_sympy_shape) vi.CopyFrom( helper.make_tensor_value_info( @@ -1707,7 +1712,7 @@ def _infer_Resize(self, node): # noqa: N802 scales = list(scales) new_sympy_shape = [ sympy.simplify(sympy.floor(d * (end - start) * scale)) - for d, start, end, scale in zip(input_sympy_shape, roi_start, roi_end, scales) + for d, start, end, scale in zip(input_sympy_shape, roi_start, roi_end, scales, strict=False) ] self._update_computed_dims(new_sympy_shape) else: @@ -1814,12 +1819,12 @@ def flatten_min(expr): def replace_min_with_arg(arg_idx): replaced = list(expr.args) - assert isinstance( - replaced[min_pos], sympy.Min - ), f"Expected a sympy.Min() at position {min_pos}, got {replaced[min_pos]}" - assert ( - len(replaced[min_pos].args) == 2 - ), f"Expected a sympy.Min() with exactly 2 arguments, got {replaced[min_pos]}" + assert isinstance(replaced[min_pos], sympy.Min), ( + f"Expected a sympy.Min() at position {min_pos}, got {replaced[min_pos]}" + ) + assert len(replaced[min_pos].args) == 2, ( + f"Expected a sympy.Min() with exactly 2 arguments, got {replaced[min_pos]}" + ) replaced[min_pos] = replaced[min_pos].args[arg_idx] return sympy.Add(*replaced) @@ -1893,7 +1898,7 @@ def handle_negative_index(index, bound): for i in axes: new_sympy_shape[i] = self._new_symbolic_dim_from_output(node, 0, i) else: - for i, s, e, t in zip(axes, starts, ends, steps): + for i, s, e, t in zip(axes, starts, ends, steps, strict=False): e = handle_negative_index(e, new_sympy_shape[i]) # noqa: PLW2901 if is_literal(e): if e >= self.int_max_: @@ -2667,25 +2672,25 @@ def get_prereq(node): # topological sort nodes, note there might be dead nodes so we check if all graph outputs are reached to terminate sorted_nodes = [] sorted_known_vi = {i.name for i in list(self.out_mp_.graph.input) + list(self.out_mp_.graph.initializer)} - if any([o.name in sorted_known_vi for o in self.out_mp_.graph.output]): + if any(o.name in sorted_known_vi for o in self.out_mp_.graph.output): # Loop/Scan will have some graph output in graph inputs, so don't do topological sort sorted_nodes = self.out_mp_.graph.node else: - while not all([o.name in sorted_known_vi for o in self.out_mp_.graph.output]): + while not all(o.name in sorted_known_vi for o in self.out_mp_.graph.output): old_sorted_nodes_len = len(sorted_nodes) for node in self.out_mp_.graph.node: if (node.output[0] not in sorted_known_vi) and all( - [i in sorted_known_vi for i in prereq_for_node[node.output[0]] if i] + i in sorted_known_vi for i in prereq_for_node[node.output[0]] if i ): sorted_known_vi.update(node.output) sorted_nodes.append(node) if old_sorted_nodes_len == len(sorted_nodes) and not all( - [o.name in sorted_known_vi for o in self.out_mp_.graph.output] + o.name in sorted_known_vi for o in self.out_mp_.graph.output ): raise Exception("Invalid model with cyclic graph") for node in sorted_nodes: - assert all([i in self.known_vi_ for i in node.input if i]) + assert all(i in self.known_vi_ for i in node.input if i) self._onnx_infer_single_node(node) known_aten_op = False if node.op_type in self.dispatcher_: @@ -2841,7 +2846,7 @@ def get_prereq(node): self._add_suggested_merge( [ s[i] if is_literal(s[i]) else str(s[i]) - for s, i in zip(shapes, dim_idx) + for s, i in zip(shapes, dim_idx, strict=False) if i >= 0 ] ) diff --git a/onnxruntime/python/tools/tensorrt/perf/benchmark.py b/onnxruntime/python/tools/tensorrt/perf/benchmark.py index 4fa5d0c0ea034..2152a66d1f2e7 100644 --- a/onnxruntime/python/tools/tensorrt/perf/benchmark.py +++ b/onnxruntime/python/tools/tensorrt/perf/benchmark.py @@ -607,7 +607,7 @@ def validate(all_ref_outputs, all_outputs, rtol, atol, percent_mismatch): output = outputs[j] # Compare the results with reference outputs - for ref_o, o in zip(ref_output, output): + for ref_o, o in zip(ref_output, output, strict=False): # abs(desired-actual) < rtol * abs(desired) + atol try: np.testing.assert_allclose(ref_o, o, rtol, atol) diff --git a/onnxruntime/python/tools/tensorrt/perf/build/build_image.py b/onnxruntime/python/tools/tensorrt/perf/build/build_image.py index 541dc4978dad1..7f418af06a4ec 100644 --- a/onnxruntime/python/tools/tensorrt/perf/build/build_image.py +++ b/onnxruntime/python/tools/tensorrt/perf/build/build_image.py @@ -6,13 +6,14 @@ Builds an Ubuntu-based Docker image with TensorRT. """ +from __future__ import annotations + import argparse import os import pty import shlex import subprocess import sys -from typing import List, Optional TRT_DOCKER_FILES = { "8.6_cuda11.8_cudnn8": "tools/ci_build/github/linux/docker/Dockerfile.ubuntu_cuda11_8_tensorrt8_6", @@ -23,7 +24,7 @@ } -def run_cmd(cmd: List[str]) -> Optional[int]: +def run_cmd(cmd: list[str]) -> int | None: """ Runs a shell command and returns the process's return code. @@ -38,7 +39,7 @@ def run_cmd(cmd: List[str]) -> Optional[int]: return pty.spawn(cmd) -def get_common_docker_build_args(args: argparse.Namespace) -> List[str]: +def get_common_docker_build_args(args: argparse.Namespace) -> list[str]: """ Returns a list of common 'docker build' command-line arguments/options. diff --git a/onnxruntime/python/tools/tensorrt/perf/build/ort_build_latest.py b/onnxruntime/python/tools/tensorrt/perf/build/ort_build_latest.py index c7d4a7836132a..051b0c85521c4 100755 --- a/onnxruntime/python/tools/tensorrt/perf/build/ort_build_latest.py +++ b/onnxruntime/python/tools/tensorrt/perf/build/ort_build_latest.py @@ -44,9 +44,8 @@ def main(): cmake_tar = "cmake-3.28.3-linux-x86_64.tar.gz" if not os.path.exists(cmake_tar): subprocess.run(["wget", "-c", "https://cmake.org/files/v3.28/" + cmake_tar], check=True) - tar = tarfile.open(cmake_tar) - tar.extractall() - tar.close() + with tarfile.open(cmake_tar) as tar: + tar.extractall() os.environ["PATH"] = os.path.join(os.path.abspath("cmake-3.28.3-linux-x86_64"), "bin") + ":" + os.environ["PATH"] os.environ["CUDACXX"] = os.path.join(args.cuda_home, "bin", "nvcc") diff --git a/onnxruntime/python/tools/tensorrt/perf/parse_mem_concurrency_test.py b/onnxruntime/python/tools/tensorrt/perf/parse_mem_concurrency_test.py index 492de13fb42b5..b308066edacad 100644 --- a/onnxruntime/python/tools/tensorrt/perf/parse_mem_concurrency_test.py +++ b/onnxruntime/python/tools/tensorrt/perf/parse_mem_concurrency_test.py @@ -103,7 +103,7 @@ def parse_concurrency_test_log(input_path, output_path): # Parse mem_test log logs = ["valgrind.log", "concurrency_test.log"] csv_paths = ["mem_test.csv", "concurrency_test.csv"] - for log, csv_path in zip(logs, csv_paths): + for log, csv_path in zip(logs, csv_paths, strict=False): if os.path.exists(log): print(f"{identifier}: Parsing {log}") if log == logs[0]: @@ -112,7 +112,9 @@ def parse_concurrency_test_log(input_path, output_path): parse_concurrency_test_log(log, csv_path) # Upload to db - for csv_path, db_table_name in zip(csv_paths, ["ep_valgrind_record", "ep_concurrencytest_record"]): + for csv_path, db_table_name in zip( + csv_paths, ["ep_valgrind_record", "ep_concurrencytest_record"], strict=False + ): if os.path.exists(csv_path): table = pd.read_csv(csv_path) write_table( diff --git a/onnxruntime/python/tools/tensorrt/perf/perf_utils.py b/onnxruntime/python/tools/tensorrt/perf/perf_utils.py index 0d0f7cc48f361..9812c160e9eb4 100644 --- a/onnxruntime/python/tools/tensorrt/perf/perf_utils.py +++ b/onnxruntime/python/tools/tensorrt/perf/perf_utils.py @@ -183,13 +183,13 @@ def parse_single_file(f): print("------First run ops map (START)------") for key, map in provider_op_map_first_run.items(): print(key) - pp.pprint({k: v for k, v in sorted(map.items(), key=lambda item: item[1], reverse=True)}) + pp.pprint(dict(sorted(map.items(), key=lambda item: item[1], reverse=True))) print("------First run ops map (END) ------") print("------Second run ops map (START)------") for key, map in provider_op_map.items(): print(key) - pp.pprint({k: v for k, v in sorted(map.items(), key=lambda item: item[1], reverse=True)}) + pp.pprint(dict(sorted(map.items(), key=lambda item: item[1], reverse=True))) print("------Second run ops map (END) ------") if model_run_flag: diff --git a/onnxruntime/python/tools/tensorrt/perf/setup_scripts/setup_onnx_zoo.py b/onnxruntime/python/tools/tensorrt/perf/setup_scripts/setup_onnx_zoo.py index 0532dd7c72c1c..d2b42df276d2c 100644 --- a/onnxruntime/python/tools/tensorrt/perf/setup_scripts/setup_onnx_zoo.py +++ b/onnxruntime/python/tools/tensorrt/perf/setup_scripts/setup_onnx_zoo.py @@ -17,11 +17,13 @@ def create_model_folder(model): def extract_and_get_files(file_name): model_folder = file_name.replace(".tar.gz", "") + "/" create_model_folder(model_folder) - model_tar = tarfile.open(file_name) - model_tar.extractall(model_folder) - file_list = model_tar.getnames() - file_list.sort() - model_tar.close() + with tarfile.open(file_name) as model_tar: + for member in model_tar.getmembers(): + if os.path.isabs(member.name) or ".." in member.name: + raise ValueError(f"Illegal tar archive entry: {member.name}") + model_tar.extractall(model_folder) + file_list = model_tar.getnames() + file_list.sort() return model_folder, file_list diff --git a/onnxruntime/python/tools/transformers/benchmark.py b/onnxruntime/python/tools/transformers/benchmark.py index 55ce8d752a9d6..54027a5a70d52 100644 --- a/onnxruntime/python/tools/transformers/benchmark.py +++ b/onnxruntime/python/tools/transformers/benchmark.py @@ -13,33 +13,33 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" Benchmarking the inference of pretrained transformer models. - PyTorch/TorchScript benchmark is based on https://github.com/huggingface/transformers/blob/master/examples/benchmarks.py. - One difference is that random input_ids is generated in this benchmark. - - For onnxruntime, this script will convert a pretrained model to ONNX, and optimize it when -o parameter is used. - - Example commands: - Export all models to ONNX, optimize and validate them: - python benchmark.py -b 0 -o -v -i 1 2 3 - Run OnnxRuntime on GPU for all models: - python benchmark.py -g - Run OnnxRuntime on GPU for all models with fp32 optimization: - python benchmark.py -g -o - Run OnnxRuntime on GPU with fp16 optimization: - python benchmark.py -g -o -p "fp16" - Run TorchScript on GPU for all models: - python benchmark.py -e torchscript -g - Run TorchScript on GPU for all models with fp16: - python benchmark.py -e torchscript -g -p "fp16" - Run ONNXRuntime and TorchScript on CPU for all models with quantization: - python benchmark.py -e torchscript onnxruntime -p "int8" -o - Run OnnxRuntime with the ROCM provider and graph optimization script: - python benchmark.py -g -m bert-base-cased --provider rocm --optimizer_info by_script --disable_embed_layer_norm - Run OnnxRuntime with bfloat16 fastmath mode kernels on aarch64 platforms with bfloat16 support: - python benchmark.py --enable_arm64_bfloat16_fastmath_mlas_gemm - - It is recommended to use run_benchmark.sh to launch benchmark. +"""Benchmarking the inference of pretrained transformer models. +PyTorch/TorchScript benchmark is based on https://github.com/huggingface/transformers/blob/master/examples/benchmarks.py. +One difference is that random input_ids is generated in this benchmark. + +For onnxruntime, this script will convert a pretrained model to ONNX, and optimize it when -o parameter is used. + +Example commands: + Export all models to ONNX, optimize and validate them: + python benchmark.py -b 0 -o -v -i 1 2 3 + Run OnnxRuntime on GPU for all models: + python benchmark.py -g + Run OnnxRuntime on GPU for all models with fp32 optimization: + python benchmark.py -g -o + Run OnnxRuntime on GPU with fp16 optimization: + python benchmark.py -g -o -p "fp16" + Run TorchScript on GPU for all models: + python benchmark.py -e torchscript -g + Run TorchScript on GPU for all models with fp16: + python benchmark.py -e torchscript -g -p "fp16" + Run ONNXRuntime and TorchScript on CPU for all models with quantization: + python benchmark.py -e torchscript onnxruntime -p "int8" -o + Run OnnxRuntime with the ROCM provider and graph optimization script: + python benchmark.py -g -m bert-base-cased --provider rocm --optimizer_info by_script --disable_embed_layer_norm + Run OnnxRuntime with bfloat16 fastmath mode kernels on aarch64 platforms with bfloat16 support: + python benchmark.py --enable_arm64_bfloat16_fastmath_mlas_gemm + +It is recommended to use run_benchmark.sh to launch benchmark. """ import argparse @@ -117,6 +117,7 @@ def run_onnxruntime( if ( use_gpu and ("CUDAExecutionProvider" not in onnxruntime.get_available_providers()) + and ("MIGraphXExecutionProvider" not in onnxruntime.get_available_providers()) and ("ROCMExecutionProvider" not in onnxruntime.get_available_providers()) and ("DmlExecutionProvider" not in onnxruntime.get_available_providers()) ): @@ -438,9 +439,9 @@ def run_in_graph_mode(*args, **kwargs): return func(*args, **kwargs) if do_eager_mode is True: - assert ( - use_xla is False - ), "Cannot run model in XLA, if `args.eager_mode` is set to `True`. Please set `args.eager_mode=False`." + assert use_xla is False, ( + "Cannot run model in XLA, if `args.eager_mode` is set to `True`. Please set `args.eager_mode=False`." + ) return run_in_eager_mode else: return run_in_graph_mode diff --git a/onnxruntime/python/tools/transformers/benchmark_helper.py b/onnxruntime/python/tools/transformers/benchmark_helper.py index 66f7a63447764..2a210729112d7 100644 --- a/onnxruntime/python/tools/transformers/benchmark_helper.py +++ b/onnxruntime/python/tools/transformers/benchmark_helper.py @@ -16,7 +16,7 @@ from datetime import datetime from enum import Enum from time import sleep -from typing import Any, Dict, List, Optional +from typing import Any import coloredlogs import numpy @@ -167,9 +167,9 @@ def prepare_environment(cache_dir, output_dir, use_gpu, provider=None): if use_gpu: if provider == "dml": - assert ( - "DmlExecutionProvider" in onnxruntime.get_available_providers() - ), "Please install onnxruntime-directml package to test GPU inference." + assert "DmlExecutionProvider" in onnxruntime.get_available_providers(), ( + "Please install onnxruntime-directml package to test GPU inference." + ) else: assert not set(onnxruntime.get_available_providers()).isdisjoint( @@ -405,7 +405,7 @@ def set_random_seed(seed=123): # torch.backends.cudnn.deterministic = True -def get_gpu_info() -> Optional[List[Dict[str, Any]]]: +def get_gpu_info() -> list[dict[str, Any]] | None: from py3nvml.py3nvml import ( NVMLError, nvmlDeviceGetCount, @@ -459,7 +459,7 @@ def measure_cpu_usage(self): return max_usage @abstractmethod - def measure_gpu_usage(self) -> Optional[List[Dict[str, Any]]]: + def measure_gpu_usage(self) -> list[dict[str, Any]] | None: raise NotImplementedError() @@ -467,7 +467,7 @@ class CudaMemoryMonitor(MemoryMonitor): def __init__(self, keep_measuring=True): super().__init__(keep_measuring) - def measure_gpu_usage(self) -> Optional[List[Dict[str, Any]]]: + def measure_gpu_usage(self) -> list[dict[str, Any]] | None: from py3nvml.py3nvml import ( NVMLError, nvmlDeviceGetCount, diff --git a/onnxruntime/python/tools/transformers/bert_perf_test.py b/onnxruntime/python/tools/transformers/bert_perf_test.py index 17c5d3602bb3b..96dce10e4c274 100644 --- a/onnxruntime/python/tools/transformers/bert_perf_test.py +++ b/onnxruntime/python/tools/transformers/bert_perf_test.py @@ -23,7 +23,6 @@ from dataclasses import dataclass from datetime import datetime from pathlib import Path -from typing import Optional import numpy as np import psutil @@ -55,8 +54,8 @@ class ModelSetting: segment_ids_name: str input_mask_name: str opt_level: int - input_tuning_results: Optional[str] - output_tuning_results: Optional[str] + input_tuning_results: str | None + output_tuning_results: str | None mask_type: int @@ -597,7 +596,7 @@ def main(): Path(args.model).parent, "perf_results_{}_B{}_S{}_{}.txt".format( "GPU" if args.use_gpu else "CPU", - "-".join([str(x) for x in sorted(list(batch_size_set))]), + "-".join([str(x) for x in sorted(batch_size_set)]), args.sequence_length, datetime.now().strftime("%Y%m%d-%H%M%S"), ), diff --git a/onnxruntime/python/tools/transformers/bert_test_data.py b/onnxruntime/python/tools/transformers/bert_test_data.py index ccf2497d61342..55a4e4e5824ed 100644 --- a/onnxruntime/python/tools/transformers/bert_test_data.py +++ b/onnxruntime/python/tools/transformers/bert_test_data.py @@ -10,7 +10,6 @@ import os import random from pathlib import Path -from typing import Dict, Optional, Tuple import numpy as np from onnx import ModelProto, TensorProto, numpy_helper @@ -157,7 +156,7 @@ def fake_input_mask_data( return data -def output_test_data(directory: str, inputs: Dict[str, np.ndarray]): +def output_test_data(directory: str, inputs: dict[str, np.ndarray]): """Output input tensors of test data to a directory Args: @@ -305,10 +304,10 @@ def get_graph_input_from_embed_node(onnx_model, embed_node, input_index): def find_bert_inputs( onnx_model: OnnxModel, - input_ids_name: Optional[str] = None, - segment_ids_name: Optional[str] = None, - input_mask_name: Optional[str] = None, -) -> Tuple[Optional[np.ndarray], Optional[np.ndarray], Optional[np.ndarray]]: + input_ids_name: str | None = None, + segment_ids_name: str | None = None, + input_mask_name: str | None = None, +) -> tuple[np.ndarray | None, np.ndarray | None, np.ndarray | None]: """Find graph inputs for BERT model. First, we will deduce inputs from EmbedLayerNormalization node. If not found, we will guess the meaning of graph inputs based on naming. @@ -397,10 +396,10 @@ def find_bert_inputs( def get_bert_inputs( onnx_file: str, - input_ids_name: Optional[str] = None, - segment_ids_name: Optional[str] = None, - input_mask_name: Optional[str] = None, -) -> Tuple[Optional[np.ndarray], Optional[np.ndarray], Optional[np.ndarray]]: + input_ids_name: str | None = None, + segment_ids_name: str | None = None, + input_mask_name: str | None = None, +) -> tuple[np.ndarray | None, np.ndarray | None, np.ndarray | None]: """Find graph inputs for BERT model. First, we will deduce inputs from EmbedLayerNormalization node. If not found, we will guess the meaning of graph inputs based on naming. @@ -531,9 +530,9 @@ def create_and_save_test_data( test_cases: int, seed: int, verbose: bool, - input_ids_name: Optional[str], - segment_ids_name: Optional[str], - input_mask_name: Optional[str], + input_ids_name: str | None, + segment_ids_name: str | None, + input_mask_name: str | None, only_input_tensors: bool, average_sequence_length: int, random_sequence_length: bool, diff --git a/onnxruntime/python/tools/transformers/compare_bert_results.py b/onnxruntime/python/tools/transformers/compare_bert_results.py index 03bcc20d9a5de..bfb19e08b4fe0 100644 --- a/onnxruntime/python/tools/transformers/compare_bert_results.py +++ b/onnxruntime/python/tools/transformers/compare_bert_results.py @@ -37,16 +37,23 @@ def compare(baseline_results, treatment_results, verbose, rtol=1e-1, atol=1e-3): # Validate the output of baseline and treatment, to make sure the results are similar. diff_count = 0 max_abs_diff = 0 + max_diff_percentage = 0 + case_passed = True for test_case_id, results in enumerate(baseline_results): - case_passed = True for i in range(len(results)): treatment_output = treatment_results[test_case_id][i] - abs_diff = np.amax(np.abs(treatment_output - results[i])) + abs_diff_tensor = np.abs(treatment_output - results[i]) + abs_diff = np.amax(abs_diff_tensor) if verbose and abs_diff > atol: print("abs_diff", abs_diff) print("treatment", treatment_output) print("baseline", results[i]) + count_exceeding = np.sum(abs_diff_tensor > atol) + total_elements = abs_diff_tensor.size + percentage_exceeding = (count_exceeding / total_elements) * 100 + max_diff_percentage = max(max_diff_percentage, percentage_exceeding) + max_abs_diff = max(max_abs_diff, abs_diff) if not np.allclose(results[i].tolist(), treatment_output.tolist(), rtol=rtol, atol=atol): if case_passed: @@ -66,6 +73,7 @@ def compare(baseline_results, treatment_results, verbose, rtol=1e-1, atol=1e-3): ) print(f"maximum absolute difference={max_abs_diff}") + print(f"maximum percentage of elements that exceeds atol={atol} is {max_diff_percentage:.3f}%") return max_abs_diff, case_passed diff --git a/onnxruntime/python/tools/transformers/convert_generation.py b/onnxruntime/python/tools/transformers/convert_generation.py index 5a26fedb5287d..68bf9e9e69059 100644 --- a/onnxruntime/python/tools/transformers/convert_generation.py +++ b/onnxruntime/python/tools/transformers/convert_generation.py @@ -48,7 +48,7 @@ import time from enum import Enum from pathlib import Path -from typing import Any, Dict, List, Optional, Union +from typing import Any import numpy as np import onnx @@ -86,7 +86,7 @@ def __str__(self): return self.value -def parse_arguments(argv: Optional[List[str]] = None) -> argparse.Namespace: +def parse_arguments(argv: list[str] | None = None) -> argparse.Namespace: """Parse arguments Args: @@ -883,8 +883,8 @@ def remove_shared_initializers( graph2: GraphProto, shared_prefix: str = "shared_", min_elements: int = 1024, - signature_cache1: Optional[dict] = None, - signature_cache2: Optional[dict] = None, + signature_cache1: dict | None = None, + signature_cache2: dict | None = None, ): """Remove initializers with same value from two graphs. @@ -1005,7 +1005,7 @@ def get_shared_initializers(encoder_model: ModelProto, decoder_model: ModelProto def move_initializers( graph: GraphProto, min_elements: int = 1024, -) -> List[TensorProto]: +) -> list[TensorProto]: """Remove initializers of a graph, when they have number of elements larger than a threshold. Args: @@ -2585,13 +2585,13 @@ def convert_generation_model(args: argparse.Namespace, generation_type: Generati def test_torch_performance( args: argparse.Namespace, - model: Union[GPT2LMHeadModel, T5ForConditionalGeneration], + model: GPT2LMHeadModel | T5ForConditionalGeneration, input_ids: torch.Tensor, attention_mask: torch.Tensor, eos_token_id: int, pad_token_id: int, - bad_words_ids: List[List[int]], -) -> Dict[str, Any]: + bad_words_ids: list[list[int]], +) -> dict[str, Any]: """Test PyTorch performance of text generation. Args: @@ -2661,7 +2661,7 @@ def create_attention_mask(input_ids, pad_token_id): return attention_mask -def test_gpt_model(args: argparse.Namespace, sentences: Optional[List[str]] = None, is_greedy: bool = False): +def test_gpt_model(args: argparse.Namespace, sentences: list[str] | None = None, is_greedy: bool = False): """Test GPT-2 model Args: @@ -2872,7 +2872,7 @@ def test_gpt_model(args: argparse.Namespace, sentences: Optional[List[str]] = No return output -def test_t5_model(args: argparse.Namespace, sentences: Optional[List[str]] = None): +def test_t5_model(args: argparse.Namespace, sentences: list[str] | None = None): """Test T5 or MT5 model Args: @@ -3061,7 +3061,7 @@ def test_t5_model(args: argparse.Namespace, sentences: Optional[List[str]] = Non return output -def main(argv: Optional[List[str]] = None, sentences: Optional[List[str]] = None): +def main(argv: list[str] | None = None, sentences: list[str] | None = None): """Main entry function Args: diff --git a/onnxruntime/python/tools/transformers/convert_to_packing_mode.py b/onnxruntime/python/tools/transformers/convert_to_packing_mode.py index e854312cae826..9a6388b3f350d 100644 --- a/onnxruntime/python/tools/transformers/convert_to_packing_mode.py +++ b/onnxruntime/python/tools/transformers/convert_to_packing_mode.py @@ -6,7 +6,6 @@ import argparse import logging import os -from typing import List, Union import coloredlogs from constants import ( @@ -26,15 +25,15 @@ class PackingAttentionBase: def __init__(self, model: OnnxModel, attention_op_type: str): self.model: OnnxModel = model - self.nodes_to_remove: List = [] - self.nodes_to_add: List = [] + self.nodes_to_remove: list = [] + self.nodes_to_add: list = [] self.prune_graph: bool = False self.node_name_to_graph_name: dict = {} self.this_graph_name: str = self.model.model.graph.name self.attention_op_type = attention_op_type self.attention_nodes = self.model.get_nodes_by_op_type(attention_op_type) - def _try_getting_attention_mask(self) -> Union[str, None]: + def _try_getting_attention_mask(self) -> str | None: mask_index = ( AttentionInputIDs.MASK_INDEX if self.attention_op_type == Operators.ATTENTION @@ -54,13 +53,13 @@ def _try_getting_attention_mask(self) -> Union[str, None]: return attention_mask - def _try_getting_first_attention(self) -> Union[NodeProto, None]: + def _try_getting_first_attention(self) -> NodeProto | None: if len(self.attention_nodes) <= 0: return None return self.attention_nodes[0] - def _try_getting_last_layernorm(self) -> Union[NodeProto, None]: + def _try_getting_last_layernorm(self) -> NodeProto | None: last_layernorm_node = None for node in self.model.nodes(): if node.op_type == Operators.LAYERNORM or node.op_type == Operators.SKIPLAYERNORM: @@ -70,7 +69,7 @@ def _try_getting_last_layernorm(self) -> Union[NodeProto, None]: def _are_attentions_supported(self) -> bool: raise NotImplementedError() - def _insert_removepadding_node(self, inputs: List[str], outputs: List[str]) -> None: + def _insert_removepadding_node(self, inputs: list[str], outputs: list[str]) -> None: new_node = helper.make_node( Operators.REMOVEPADDING, inputs=inputs, @@ -82,7 +81,7 @@ def _insert_removepadding_node(self, inputs: List[str], outputs: List[str]) -> N self.nodes_to_add.append(new_node) self.node_name_to_graph_name[new_node.name] = self.this_graph_name - def _insert_restorepadding_node(self, inputs: List[str], outputs: List[str]) -> None: + def _insert_restorepadding_node(self, inputs: list[str], outputs: list[str]) -> None: new_node = helper.make_node( Operators.RESTOREPADDING, inputs=inputs, @@ -97,7 +96,7 @@ def _insert_restorepadding_node(self, inputs: List[str], outputs: List[str]) -> def _replace_attention_with_packing_attention(self, token_offset: str, cumulative_sequence_length: str) -> None: raise NotImplementedError() - def _get_input_to_remove_padding(self, first_attention_node) -> Union[str, None]: + def _get_input_to_remove_padding(self, first_attention_node) -> str | None: if self.attention_op_type == Operators.ATTENTION: return first_attention_node.input[AttentionInputIDs.INPUT] return None @@ -306,7 +305,7 @@ def _replace_attention_with_packing_attention(self, token_offset: str, cumulativ logger.info("Converted %d MultiHeadAttention nodes to PackedMultiHeadAttention.", len(self.attention_nodes)) logger.info("Converted %d GatedRelativePositionBias nodes to packing mode.", gated_relative_pos_bias_count) - def _get_input_to_remove_padding(self, first_attention_node) -> Union[str, None]: + def _get_input_to_remove_padding(self, first_attention_node) -> str | None: # When there are query, key and value inputs, we need to find the first input of the parent MatMul node. matmul = self.model.get_parent(first_attention_node, 0) if matmul and matmul.op_type == "MatMul": diff --git a/onnxruntime/python/tools/transformers/float16.py b/onnxruntime/python/tools/transformers/float16.py index 74adc951c4aa3..349f5bb51fe47 100644 --- a/onnxruntime/python/tools/transformers/float16.py +++ b/onnxruntime/python/tools/transformers/float16.py @@ -16,7 +16,6 @@ import logging import os import tempfile -from typing import Dict import numpy as np import onnx @@ -201,9 +200,9 @@ def convert_float_to_float16( Returns: ModelProto: converted model. """ - assert ( - min_positive_val >= 5.96e-08 - ), "invalid min_positive_val. smallest positive float16 value: subnormal 5.96e-08, and normalized 6.104e-05" + assert min_positive_val >= 5.96e-08, ( + "invalid min_positive_val. smallest positive float16 value: subnormal 5.96e-08, and normalized 6.104e-05" + ) assert max_finite_val <= float(np.finfo(np.float16).max), "invalid max_finite_val. largest float16 value: 65504" force_fp16_inputs_dict = {} if force_fp16_inputs is None else force_fp16_inputs @@ -304,7 +303,7 @@ def convert_float_to_float16( value_info_list.append(new_value_info) io_casts.add(node_name) - fp32_initializers: Dict[str, InitializerTracker] = {} + fp32_initializers: dict[str, InitializerTracker] = {} while queue: next_level = [] for q in queue: diff --git a/onnxruntime/python/tools/transformers/fusion_attention.py b/onnxruntime/python/tools/transformers/fusion_attention.py index 030708783bb61..73fde4fdc774e 100644 --- a/onnxruntime/python/tools/transformers/fusion_attention.py +++ b/onnxruntime/python/tools/transformers/fusion_attention.py @@ -3,7 +3,6 @@ # Licensed under the MIT License. # -------------------------------------------------------------------------- from logging import getLogger -from typing import List, Optional, Tuple, Union import numpy as np from fusion_base import Fusion @@ -42,7 +41,7 @@ def get_first_mask(self): assert len(self.mask_indice) > 0 return next(iter(self.mask_indice)) - def process_mask(self, mask_2d: str) -> Optional[str]: + def process_mask(self, mask_2d: str) -> str | None: if self.mask_format == AttentionMaskFormat.NoMask: return None @@ -111,10 +110,10 @@ def __init__( model: OnnxModel, hidden_size: int, num_heads: int, - attention_mask: Optional[AttentionMask] = None, + attention_mask: AttentionMask | None = None, use_multi_head_attention: bool = False, disable_multi_head_attention_bias: bool = False, - search_op_types: List[str] = ["SkipLayerNormalization", "LayerNormalization"], # noqa: B006 + search_op_types: list[str] = ["SkipLayerNormalization", "LayerNormalization"], # noqa: B006 ): attention_op_name = "MultiHeadAttention" if use_multi_head_attention else "Attention" super().__init__(model, attention_op_name, search_op_types) @@ -132,7 +131,7 @@ def __init__( self.shape_infer = None self.shape_infer_done = True - def get_num_heads_and_hidden_size_from_concat(self, concat: NodeProto) -> Tuple[int, int]: + def get_num_heads_and_hidden_size_from_concat(self, concat: NodeProto) -> tuple[int, int]: """ Detect num_heads and hidden_size from Concat node in the following subgraph: @@ -163,7 +162,7 @@ def get_num_heads_and_hidden_size_from_concat(self, concat: NodeProto) -> Tuple[ return self.num_heads, self.hidden_size - def get_num_heads_and_hidden_size(self, reshape_q: NodeProto) -> Tuple[int, int]: + def get_num_heads_and_hidden_size(self, reshape_q: NodeProto) -> tuple[int, int]: """Detect num_heads and hidden_size from a reshape node. Args: @@ -355,52 +354,13 @@ def split_kv(self, present_k_name: str, present_v_name: str, kv_node: str): self.node_name_to_graph_name[gather_k_name] = self.this_graph_name self.node_name_to_graph_name[gather_v_name] = self.this_graph_name - def transpose_kv(self, past_k: str, past_v: str): - """Transpose past_k and past_v from (B,N,P,H) to (B,P,N,H) - - Args: - past_k (str): name of past K value of shape (B,N,P,H) - past_v (str): name of past V value of shape (B,N,P,H) - - Returns: - past_k_transpose (str): name of past K value of shape (B,P,N,H) - past_v_transpose (str): name of past V value of shape (B,P,N,H) - """ - past_k_transpose = (past_k + "_transposed").replace(".", "_") - past_v_transpose = (past_v + "_transposed").replace(".", "_") - transpose_k_name = self.model.create_node_name("Transpose") - transpose_v_name = self.model.create_node_name("Transpose") - - transpose_k = helper.make_node( - "Transpose", - inputs=[past_k], - outputs=[past_k_transpose], - name=transpose_k_name, - perm=[0, 2, 1, 3], - ) - transpose_v = helper.make_node( - "Transpose", - inputs=[past_v], - outputs=[past_v_transpose], - name=transpose_v_name, - perm=[0, 2, 1, 3], - ) - - # Add reshape nodes to graph - self.nodes_to_add.append(transpose_k) - self.nodes_to_add.append(transpose_v) - self.node_name_to_graph_name[transpose_k_name] = self.this_graph_name - self.node_name_to_graph_name[transpose_v_name] = self.this_graph_name - - return past_k_transpose, past_v_transpose - def create_combined_qkv_bias( self, q_add: NodeProto, - k_add: Union[NodeProto, None], - v_add: Union[NodeProto, None], + k_add: NodeProto | None, + v_add: NodeProto | None, name_prefix: str, - ) -> Union[NodeProto, None]: + ) -> NodeProto | None: q_bias = self.model.get_initializer(q_add.input[1]) or self.model.get_initializer(q_add.input[0]) qb = NumpyHelper.to_array(q_bias) kb = np.zeros_like(qb) @@ -430,9 +390,9 @@ def create_packed_qkv_matmul_node( k_matmul: NodeProto, v_matmul: NodeProto, q_add: NodeProto, - k_add: Union[NodeProto, None], - v_add: Union[NodeProto, None], - ) -> Tuple[NodeProto, NodeProto, NodeProto]: + k_add: NodeProto | None, + v_add: NodeProto | None, + ) -> tuple[NodeProto, NodeProto, NodeProto]: """Create packed QKV MatMul node before MultiHeadAttention node. This is for the scenario where an Attention node should be created but cannot be created because past_key and past_value are separate inputs and not one concatenated input. @@ -571,11 +531,11 @@ def create_packed_qkv_matmul_node( def create_multihead_attention_node( self, q_matmul: NodeProto, - k_matmul: Union[NodeProto, str, None], - v_matmul: Union[NodeProto, str, None], + k_matmul: NodeProto | str | None, + v_matmul: NodeProto | str | None, q_add: NodeProto, - k_add: Union[NodeProto, None], - v_add: Union[NodeProto, None], + k_add: NodeProto | None, + v_add: NodeProto | None, num_heads: int, hidden_size: int, output: str, @@ -586,7 +546,7 @@ def create_multihead_attention_node( present_k: str = "", present_v: str = "", packed_qkv: bool = False, - ) -> Union[NodeProto, None]: + ) -> NodeProto | None: """Create a MultiHeadAttention node. Args: @@ -619,7 +579,7 @@ def create_multihead_attention_node( logger.debug("input hidden size %d is not a multiple of num of heads %d", hidden_size, num_heads) return None - graph_input_names = set([node.name for node in self.model.graph().input]) + graph_input_names = {node.name for node in self.model.graph().input} mha_node_name = self.model.create_node_name("Attention") # Add initial Q/K/V inputs for MHA @@ -686,7 +646,7 @@ def create_multihead_attention_node( def create_attention_node( self, - mask_index: Optional[str], + mask_index: str | None, q_matmul: NodeProto, k_matmul: NodeProto, v_matmul: NodeProto, @@ -702,9 +662,9 @@ def create_attention_node( past_v: str = "", present_k: str = "", present_v: str = "", - scale: Optional[float] = None, + scale: float | None = None, causal: bool = False, - ) -> Union[NodeProto, None]: + ) -> NodeProto | None: """Create an Attention node. Args: @@ -801,7 +761,7 @@ def create_attention_node( qkv_weight_dim = 3 * qw_out_size qkv_bias_dim = 0 - qkv_bias: Optional[np.ndarray] = None + qkv_bias: np.ndarray | None = None if has_bias: qb = NumpyHelper.to_array(q_bias) kb = NumpyHelper.to_array(k_bias) diff --git a/onnxruntime/python/tools/transformers/fusion_attention_clip.py b/onnxruntime/python/tools/transformers/fusion_attention_clip.py index 16e2c36bfd092..a4a7a5c8c1890 100644 --- a/onnxruntime/python/tools/transformers/fusion_attention_clip.py +++ b/onnxruntime/python/tools/transformers/fusion_attention_clip.py @@ -3,7 +3,6 @@ # Licensed under the MIT License. # -------------------------------------------------------------------------- from logging import getLogger -from typing import Tuple from fusion_attention import AttentionMask, FusionAttention from fusion_options import AttentionMaskFormat @@ -36,7 +35,7 @@ def __init__( search_op_types=["SkipLayerNormalization"], ) - def get_num_heads_and_hidden_size(self, reshape_q: NodeProto) -> Tuple[int, int]: + def get_num_heads_and_hidden_size(self, reshape_q: NodeProto) -> tuple[int, int]: """Detect num_heads and hidden_size for ONNX model from MiDaS Args: reshape_q (NodeProto): reshape node for q diff --git a/onnxruntime/python/tools/transformers/fusion_attention_sam2.py b/onnxruntime/python/tools/transformers/fusion_attention_sam2.py index ce7ddd3c1050e..f66d7d12d1e5f 100644 --- a/onnxruntime/python/tools/transformers/fusion_attention_sam2.py +++ b/onnxruntime/python/tools/transformers/fusion_attention_sam2.py @@ -3,7 +3,6 @@ # Licensed under the MIT License. # -------------------------------------------------------------------------- from logging import getLogger -from typing import Tuple, Union import numpy as np from fusion_base import Fusion @@ -97,7 +96,7 @@ def get_hidden_size(self, layernorm_node): def get_num_heads_and_hidden_size( self, reshape_q: NodeProto, layernorm_node: NodeProto, is_encoder: bool = False - ) -> Tuple[int, int]: + ) -> tuple[int, int]: """Detect num_heads and hidden_size. Args: @@ -142,7 +141,7 @@ def create_attention_node( num_heads: int, hidden_size: int, output: str, - ) -> Union[NodeProto, None]: + ) -> NodeProto | None: """Create an Attention node. Args: diff --git a/onnxruntime/python/tools/transformers/fusion_attention_unet.py b/onnxruntime/python/tools/transformers/fusion_attention_unet.py index 048c13cdb1e2c..1bdf4f24f3621 100644 --- a/onnxruntime/python/tools/transformers/fusion_attention_unet.py +++ b/onnxruntime/python/tools/transformers/fusion_attention_unet.py @@ -3,7 +3,6 @@ # Licensed under the MIT License. # -------------------------------------------------------------------------- from logging import getLogger -from typing import Tuple, Union import numpy as np from fusion_base import Fusion @@ -91,7 +90,7 @@ def get_hidden_size(self, layernorm_node): def get_num_heads_and_hidden_size( self, reshape_q: NodeProto, layernorm_node: NodeProto, is_torch2: bool = False - ) -> Tuple[int, int]: + ) -> tuple[int, int]: """Detect num_heads and hidden_size. Args: @@ -132,7 +131,7 @@ def create_attention_node( hidden_size: int, input: str, output: str, - ) -> Union[NodeProto, None]: + ) -> NodeProto | None: """Create an Attention node. Args: @@ -373,7 +372,9 @@ def create_attention_node( else "MultiHeadAttention ({})".format( "self attention with packed qkv" if self.enable_packed_qkv - else "cross attention with packed kv" if self.enable_packed_kv else "cross attention" + else "cross attention with packed kv" + if self.enable_packed_kv + else "cross attention" ) ) self.increase_counter(counter_name) @@ -388,7 +389,7 @@ def create_attention_node_lora( hidden_size: int, input: str, output: str, - ) -> Union[NodeProto, None]: + ) -> NodeProto | None: """Create an Attention node. Args: @@ -841,7 +842,9 @@ def create_attention_node_lora( else "MultiHeadAttention ({})".format( "self attention with packed qkv" if self.enable_packed_qkv - else "cross attention with packed kv" if self.enable_packed_kv else "cross attention" + else "cross attention with packed kv" + if self.enable_packed_kv + else "cross attention" ) ) self.increase_counter(counter_name) diff --git a/onnxruntime/python/tools/transformers/fusion_attention_vae.py b/onnxruntime/python/tools/transformers/fusion_attention_vae.py index 151c04f9334fe..2b57fa2c418cf 100644 --- a/onnxruntime/python/tools/transformers/fusion_attention_vae.py +++ b/onnxruntime/python/tools/transformers/fusion_attention_vae.py @@ -3,7 +3,6 @@ # Licensed under the MIT License. # -------------------------------------------------------------------------- from logging import getLogger -from typing import Tuple, Union import numpy as np from fusion_base import Fusion @@ -27,7 +26,7 @@ def __init__(self, model: OnnxModel, hidden_size: int, num_heads: int): self.num_heads_warning = True self.hidden_size_warning = True - def get_num_heads_and_hidden_size(self, reshape_q: NodeProto, add_q: NodeProto) -> Tuple[int, int]: + def get_num_heads_and_hidden_size(self, reshape_q: NodeProto, add_q: NodeProto) -> tuple[int, int]: """Detect num_heads and hidden_size from a reshape node. Args: @@ -80,7 +79,7 @@ def create_attention_node( hidden_size: int, input_name: str, output_name: str, - ) -> Union[NodeProto, None]: + ) -> NodeProto | None: """Create an Attention node. Args: diff --git a/onnxruntime/python/tools/transformers/fusion_bart_attention.py b/onnxruntime/python/tools/transformers/fusion_bart_attention.py index 8c334b83abfeb..69445f93e683d 100644 --- a/onnxruntime/python/tools/transformers/fusion_bart_attention.py +++ b/onnxruntime/python/tools/transformers/fusion_bart_attention.py @@ -201,8 +201,8 @@ def fuse(self, normalize_node, input_name_to_nodes, output_name_to_node): root_input = output break - graph_input_names = set([node.name for node in self.model.graph().input]) - graph_output_names = set([node.name for node in self.model.graph().output]) + graph_input_names = {node.name for node in self.model.graph().input} + graph_output_names = {node.name for node in self.model.graph().output} v_nodes = self.model.match_parent_path( matmul_qkv, diff --git a/onnxruntime/python/tools/transformers/fusion_base.py b/onnxruntime/python/tools/transformers/fusion_base.py index 67f4f0b55cff8..a923e14c493f4 100644 --- a/onnxruntime/python/tools/transformers/fusion_base.py +++ b/onnxruntime/python/tools/transformers/fusion_base.py @@ -3,8 +3,9 @@ # Licensed under the MIT License. # -------------------------------------------------------------------------- from collections import defaultdict +from collections.abc import Sequence from logging import getLogger -from typing import Any, Dict, List, Optional, Sequence, Union +from typing import Any import numpy as np from onnx import NodeProto, helper @@ -22,18 +23,18 @@ def __init__( self, model: OnnxModel, fused_op_type: str, - search_op_types: Union[str, List[str]], + search_op_types: str | list[str], description: str = "", ): - self.search_op_types: List[str] = [search_op_types] if isinstance(search_op_types, str) else search_op_types + self.search_op_types: list[str] = [search_op_types] if isinstance(search_op_types, str) else search_op_types self.fused_op_type: str = fused_op_type self.description: str = f"{fused_op_type}({description})" if description else fused_op_type self.model: OnnxModel = model - self.nodes_to_remove: List = [] - self.nodes_to_add: List = [] + self.nodes_to_remove: list = [] + self.nodes_to_add: list = [] self.prune_graph: bool = False self.node_name_to_graph_name: dict = {} - self.this_graph_name: Optional[str] = None + self.this_graph_name: str | None = None # It is optional that subclass updates fused_count since we will also check nodes_to_add to get counter. self.fused_count: defaultdict = defaultdict(int) @@ -46,8 +47,8 @@ def increase_counter(self, fused_op_name: str): def fuse( self, node: NodeProto, - input_name_to_nodes: Dict[str, List[NodeProto]], - output_name_to_node: Dict[str, NodeProto], + input_name_to_nodes: dict[str, list[NodeProto]], + output_name_to_node: dict[str, NodeProto], ): """Interface for fusion that starts from a node""" raise NotImplementedError @@ -114,7 +115,7 @@ def add_initializer(self, name: str, data_type: int, dims: Sequence[int], vals: self.model.add_initializer(tensor, self.this_graph_name) return tensor - def add_nodes_to_remove(self, nodes: List[NodeProto]): + def add_nodes_to_remove(self, nodes: list[NodeProto]): # Some nodes are shared between paths (e.g. rotary embedding nodes in the Q and K paths). # When path A is fused, its shared nodes are added to `self.nodes_to_remove`. But when path B # is fused, its shared nodes are also added to `self.nodes_to_remove`. When the nodes are @@ -131,7 +132,7 @@ def add_nodes_to_remove(self, nodes: List[NodeProto]): if node not in self.nodes_to_remove: self.nodes_to_remove.append(node) - def add_nodes_to_remove_with_nodes_to_keep(self, nodes: List[NodeProto], nodes_to_keep: List[NodeProto]): + def add_nodes_to_remove_with_nodes_to_keep(self, nodes: list[NodeProto], nodes_to_keep: list[NodeProto]): for node in nodes: if node not in self.nodes_to_remove and node not in nodes_to_keep: self.nodes_to_remove.append(node) diff --git a/onnxruntime/python/tools/transformers/fusion_bias_add.py b/onnxruntime/python/tools/transformers/fusion_bias_add.py index 8489af0940983..1cb4edad04ffe 100644 --- a/onnxruntime/python/tools/transformers/fusion_bias_add.py +++ b/onnxruntime/python/tools/transformers/fusion_bias_add.py @@ -3,7 +3,6 @@ # Licensed under the MIT License. # -------------------------------------------------------------------------- from logging import getLogger -from typing import Dict from fusion_base import Fusion from numpy import ndarray @@ -17,7 +16,7 @@ class FusionBiasAdd(Fusion): def __init__(self, model: OnnxModel): super().__init__(model, "BiasAdd", "Add") - def fuse(self, add_node, input_name_to_nodes: Dict, output_name_to_node: Dict): + def fuse(self, add_node, input_name_to_nodes: dict, output_name_to_node: dict): """ Fuse Add bias and Add skip connection into BiasAdd """ diff --git a/onnxruntime/python/tools/transformers/fusion_biassplitgelu.py b/onnxruntime/python/tools/transformers/fusion_biassplitgelu.py index 67a7c0fb9ceb3..1118809fdf6d3 100644 --- a/onnxruntime/python/tools/transformers/fusion_biassplitgelu.py +++ b/onnxruntime/python/tools/transformers/fusion_biassplitgelu.py @@ -3,7 +3,6 @@ # Licensed under the MIT License. # -------------------------------------------------------------------------- from logging import getLogger -from typing import Dict from fusion_base import Fusion from onnx import helper @@ -16,7 +15,7 @@ class FusionBiasSplitGelu(Fusion): def __init__(self, model: OnnxModel): super().__init__(model, "BiasSplitGelu", "Gelu") - def fuse(self, gelu_node, input_name_to_nodes: Dict, output_name_to_node: Dict): + def fuse(self, gelu_node, input_name_to_nodes: dict, output_name_to_node: dict): """ [root] --->Add --------------------> Slice ---------------> Mul --> | ^ ^ diff --git a/onnxruntime/python/tools/transformers/fusion_embedlayer.py b/onnxruntime/python/tools/transformers/fusion_embedlayer.py index 70ff57f0626e1..66ef06097aa58 100644 --- a/onnxruntime/python/tools/transformers/fusion_embedlayer.py +++ b/onnxruntime/python/tools/transformers/fusion_embedlayer.py @@ -4,7 +4,6 @@ # -------------------------------------------------------------------------- from logging import getLogger -from typing import Dict, List, Optional, Tuple, Union from fusion_base import Fusion from fusion_utils import FusionUtils @@ -35,7 +34,7 @@ def __init__(self, model: OnnxModel, description: str = "no mask"): self.attention = None self.embed_node = None - def match_two_gather(self, add: NodeProto) -> Union[None, Tuple[NodeProto, NodeProto]]: + def match_two_gather(self, add: NodeProto) -> None | tuple[NodeProto, NodeProto]: gather_0_path = self.model.match_parent_path(add, ["Gather"], [0]) if gather_0_path is None: return None @@ -49,7 +48,7 @@ def match_two_gather(self, add: NodeProto) -> Union[None, Tuple[NodeProto, NodeP def check_attention_subgraph( self, layernorm: NodeProto, - input_name_to_nodes: Dict[str, List[NodeProto]], + input_name_to_nodes: dict[str, list[NodeProto]], is_distil_bert: bool, ) -> bool: """Check that LayerNormalization has a child of Attention node or subgraph like Attention. @@ -399,7 +398,7 @@ def check_embedding(self, word_embedding_gather, segment_embedding_gather, posit return True - def cast_to_int32(self, input_name: str) -> Tuple[str, Union[None, NodeProto]]: + def cast_to_int32(self, input_name: str) -> tuple[str, None | NodeProto]: """Cast a graph input or node input to int32. Args: @@ -428,8 +427,8 @@ def create_fused_node( layernorm: NodeProto, word_embedding_gather: NodeProto, position_embedding_gather: NodeProto, - segment_embedding_gather: Union[None, NodeProto], - position_ids: Optional[str] = None, + segment_embedding_gather: None | NodeProto, + position_ids: str | None = None, embedding_sum_output=False, embedding_sum_name=None, ): diff --git a/onnxruntime/python/tools/transformers/fusion_fastgelu.py b/onnxruntime/python/tools/transformers/fusion_fastgelu.py index a9f46585faad7..99f716193adb6 100644 --- a/onnxruntime/python/tools/transformers/fusion_fastgelu.py +++ b/onnxruntime/python/tools/transformers/fusion_fastgelu.py @@ -3,7 +3,6 @@ # Licensed under the MIT License. # -------------------------------------------------------------------------- from logging import getLogger -from typing import Dict, Optional from fusion_base import Fusion from onnx import helper @@ -16,7 +15,7 @@ class FusionFastGelu(Fusion): def __init__(self, model: OnnxModel): super().__init__(model, "FastGelu", "Tanh") - def fuse(self, tanh_node, input_name_to_nodes: Dict, output_name_to_node: Dict): + def fuse(self, tanh_node, input_name_to_nodes: dict, output_name_to_node: dict): if self.fuse_1(tanh_node, input_name_to_nodes, output_name_to_node): return @@ -26,7 +25,10 @@ def fuse(self, tanh_node, input_name_to_nodes: Dict, output_name_to_node: Dict): if self.fuse_3(tanh_node, input_name_to_nodes, output_name_to_node): return - def fuse_1(self, tanh_node, input_name_to_nodes, output_name_to_node) -> Optional[bool]: + if self.fuse_4(tanh_node, input_name_to_nodes, output_name_to_node): + return + + def fuse_1(self, tanh_node, input_name_to_nodes, output_name_to_node) -> bool | None: """ Fuse Gelu with tanh into one node: +---------------------------+ @@ -134,7 +136,7 @@ def fuse_1(self, tanh_node, input_name_to_nodes, output_name_to_node) -> Optiona self.node_name_to_graph_name[fused_node.name] = self.this_graph_name return True - def fuse_2(self, tanh_node, input_name_to_nodes: Dict, output_name_to_node: Dict) -> Optional[bool]: + def fuse_2(self, tanh_node, input_name_to_nodes: dict, output_name_to_node: dict) -> bool | None: """ This pattern is from Tensorflow model. Fuse Gelu with tanh into one node: @@ -243,7 +245,7 @@ def fuse_2(self, tanh_node, input_name_to_nodes: Dict, output_name_to_node: Dict self.node_name_to_graph_name[fused_node.name] = self.this_graph_name return True - def fuse_3(self, tanh_node, input_name_to_nodes: Dict, output_name_to_node: Dict) -> Optional[bool]: + def fuse_3(self, tanh_node, input_name_to_nodes: dict, output_name_to_node: dict) -> bool | None: """ OpenAI's gelu implementation, also used in Megatron: Gelu(x) = x * 0.5 * (1.0 + torch.tanh(0.79788456 * x * (1.0 + 0.044715 * x * x))) @@ -358,3 +360,122 @@ def fuse_3(self, tanh_node, input_name_to_nodes: Dict, output_name_to_node: Dict self.nodes_to_add.append(fused_node) self.node_name_to_graph_name[fused_node.name] = self.this_graph_name return True + + def fuse_4(self, tanh_node, input_name_to_nodes: dict, output_name_to_node: dict) -> bool | None: + """ + This pattern is from stable diffusion 3.5 model. + Fuse Gelu with tanh into one node: + +-----------------+------------------+ + | | | + | v v + [root] ==> Mul --> Mul --> Mul -----> Add --> Mul --> Tanh --> Add -----> Mul --> Mul --> + | (A=0.0447) (A=0.7978) (A=1) ^ (A=0.5) + | | + +-------------------------------------------------------------------------+ + Note that constant input for Add and Mul could be first or second input. + """ + if tanh_node.output[0] not in input_name_to_nodes: + return + + children = input_name_to_nodes[tanh_node.output[0]] + if len(children) != 1 or children[0].op_type != "Add": + return + add_after_tanh = children[0] + + if not self.model.has_constant_input(add_after_tanh, 1.0): + return + + if add_after_tanh.output[0] not in input_name_to_nodes: + return + children = input_name_to_nodes[add_after_tanh.output[0]] + if len(children) != 1 or children[0].op_type != "Mul": + return + mul_after_tanh = children[0] + + if mul_after_tanh.output[0] not in input_name_to_nodes: + return + children = input_name_to_nodes[mul_after_tanh.output[0]] + if len(children) != 1 or children[0].op_type != "Mul": + return + mul_half = children[0] + if not self.model.has_constant_input(mul_half, 0.5): + return + + root_input = mul_after_tanh.input[0 if mul_after_tanh.input[1] == add_after_tanh.output[0] else 1] + + mul_before_tanh = self.model.match_parent(tanh_node, "Mul", 0, output_name_to_node) + if mul_before_tanh is None: + return + + i = self.model.find_constant_input(mul_before_tanh, 0.7978, delta=0.0001) + if i < 0: + return + + add_before_tanh = self.model.match_parent(mul_before_tanh, "Add", 0 if i == 1 else 1, output_name_to_node) + if add_before_tanh is None: + return + + if add_before_tanh.input[0] == root_input: + another = 1 + elif add_before_tanh.input[1] == root_input: + another = 0 + else: + return + + mul_after_pow = self.model.match_parent(add_before_tanh, "Mul", another, output_name_to_node) + if mul_after_pow is None: + return + + i = self.model.find_constant_input(mul_after_pow, 0.0447, delta=0.0001) + if i < 0: + return + + mul = self.model.match_parent(mul_after_pow, "Mul", 0 if i == 1 else 1, output_name_to_node) + if mul is None: + return + + if mul.input[0] == root_input: + another = 1 + elif mul.input[1] == root_input: + another = 0 + else: + return + + mul2 = self.model.match_parent(mul, "Mul", another, output_name_to_node) + if mul2 is None: + return + + if mul2.input[0] != root_input or mul2.input[1] != root_input: + return + + subgraph_nodes = [ + mul2, + mul, + mul_after_pow, + add_before_tanh, + mul_before_tanh, + tanh_node, + add_after_tanh, + mul_after_tanh, + mul_half, + ] + + if not self.model.is_safe_to_fuse_nodes( + subgraph_nodes, + [mul_half.output[0]], + input_name_to_nodes, + output_name_to_node, + ): + return + + self.nodes_to_remove.extend(subgraph_nodes) + fused_node = helper.make_node( + "FastGelu", + inputs=[root_input], + outputs=mul_half.output, + name=self.model.create_node_name("FastGelu"), + ) + fused_node.domain = "com.microsoft" + self.nodes_to_add.append(fused_node) + self.node_name_to_graph_name[fused_node.name] = self.this_graph_name + return True diff --git a/onnxruntime/python/tools/transformers/fusion_gelu.py b/onnxruntime/python/tools/transformers/fusion_gelu.py index 6be5140c070d0..12f7d82a9c0af 100644 --- a/onnxruntime/python/tools/transformers/fusion_gelu.py +++ b/onnxruntime/python/tools/transformers/fusion_gelu.py @@ -3,7 +3,6 @@ # Licensed under the MIT License. # -------------------------------------------------------------------------- from logging import getLogger -from typing import Dict, Optional from fusion_base import Fusion from onnx import helper @@ -16,14 +15,14 @@ class FusionGelu(Fusion): def __init__(self, model: OnnxModel): super().__init__(model, "Gelu", "Erf") - def fuse(self, erf_node, input_name_to_nodes: Dict, output_name_to_node: Dict): + def fuse(self, erf_node, input_name_to_nodes: dict, output_name_to_node: dict): if self.fuse_1(erf_node, input_name_to_nodes, output_name_to_node): return if self.fuse_2(erf_node, input_name_to_nodes, output_name_to_node): return self.fuse_3(erf_node, input_name_to_nodes, output_name_to_node) - def fuse_1(self, erf_node, input_name_to_nodes: Dict, output_name_to_node: Dict) -> Optional[bool]: + def fuse_1(self, erf_node, input_name_to_nodes: dict, output_name_to_node: dict) -> bool | None: """ This pattern is from PyTorch model Fuse Gelu with Erf into one node: @@ -107,7 +106,7 @@ def fuse_1(self, erf_node, input_name_to_nodes: Dict, output_name_to_node: Dict) self.increase_counter("Gelu") return True - def fuse_2(self, erf_node, input_name_to_nodes: Dict, output_name_to_node: Dict) -> Optional[bool]: + def fuse_2(self, erf_node, input_name_to_nodes: dict, output_name_to_node: dict) -> bool | None: """ This pattern is from Keras model Fuse Gelu with Erf into one node: @@ -184,7 +183,7 @@ def fuse_2(self, erf_node, input_name_to_nodes: Dict, output_name_to_node: Dict) self.increase_counter("Gelu") return True - def fuse_3(self, erf_node, input_name_to_nodes: Dict, output_name_to_node: Dict) -> Optional[bool]: + def fuse_3(self, erf_node, input_name_to_nodes: dict, output_name_to_node: dict) -> bool | None: """ This pattern is from TensorFlow model Fuse Gelu with Erf into one node: diff --git a/onnxruntime/python/tools/transformers/fusion_gemmfastgelu.py b/onnxruntime/python/tools/transformers/fusion_gemmfastgelu.py index 4d9913f427b37..23eee1413ff9f 100644 --- a/onnxruntime/python/tools/transformers/fusion_gemmfastgelu.py +++ b/onnxruntime/python/tools/transformers/fusion_gemmfastgelu.py @@ -4,7 +4,6 @@ # -------------------------------------------------------------------------- from logging import getLogger -from typing import Dict, List, Union from fusion_base import Fusion from fusion_utils import NumpyHelper @@ -20,13 +19,13 @@ def __init__(self, model: OnnxModel): self.shape_infer = None self.shape_infer_done = False - def get_dimensions_from_tensor_proto(self, tensor_proto: TensorProto) -> Union[int, None]: + def get_dimensions_from_tensor_proto(self, tensor_proto: TensorProto) -> int | None: if tensor_proto.type.tensor_type.HasField("shape"): return len(tensor_proto.type.tensor_type.shape.dim) else: return None - def get_dimensions(self, input_name: str) -> Union[int, None]: + def get_dimensions(self, input_name: str) -> int | None: graph_input = self.model.find_graph_input(input_name) if graph_input: return self.get_dimensions_from_tensor_proto(graph_input) @@ -43,8 +42,8 @@ def get_dimensions(self, input_name: str) -> Union[int, None]: def fuse( self, node: NodeProto, - input_name_to_nodes: Dict[str, List[NodeProto]], - output_name_to_node: Dict[str, NodeProto], + input_name_to_nodes: dict[str, list[NodeProto]], + output_name_to_node: dict[str, NodeProto], ): """ This pattern is from PyTorch bert model diff --git a/onnxruntime/python/tools/transformers/fusion_group_norm.py b/onnxruntime/python/tools/transformers/fusion_group_norm.py index c718d2c27e015..2efec3e6ac6e8 100644 --- a/onnxruntime/python/tools/transformers/fusion_group_norm.py +++ b/onnxruntime/python/tools/transformers/fusion_group_norm.py @@ -3,7 +3,6 @@ # Licensed under the MIT License. # -------------------------------------------------------------------------- from logging import getLogger -from typing import Dict import numpy as np from fusion_base import Fusion @@ -18,7 +17,7 @@ def __init__(self, model: OnnxModel, channels_last=True): super().__init__(model, "GroupNorm", "Add") self.channels_last = channels_last - def fuse(self, add_node, input_name_to_nodes: Dict, output_name_to_node: Dict): + def fuse(self, add_node, input_name_to_nodes: dict, output_name_to_node: dict): """ Fuse Group Normalization subgraph into one node GroupNorm. The following is the pattern with swish activation: @@ -84,6 +83,7 @@ def fuse(self, add_node, input_name_to_nodes: Dict, output_name_to_node: Dict): instance_norm_scale = self.model.get_constant_value(instance_norm.input[1]) if instance_norm_scale is None or len(instance_norm_scale.shape) != 1: return + num_groups = int(instance_norm_scale.shape[0]) instance_norm_bias = self.model.get_constant_value(instance_norm.input[2]) if instance_norm_bias is None or instance_norm_scale.shape != instance_norm_scale.shape: @@ -156,7 +156,8 @@ def fuse(self, add_node, input_name_to_nodes: Dict, output_name_to_node: Dict): ) new_node.attribute.extend(instance_norm.attribute) - new_node.attribute.extend([helper.make_attribute("groups", 32)]) + + new_node.attribute.extend([helper.make_attribute("groups", num_groups)]) new_node.attribute.extend([helper.make_attribute("activation", 1 if has_swish_activation else 0)]) if not self.channels_last: diff --git a/onnxruntime/python/tools/transformers/fusion_layernorm.py b/onnxruntime/python/tools/transformers/fusion_layernorm.py index aac05a7f01325..1c96c54d9de35 100644 --- a/onnxruntime/python/tools/transformers/fusion_layernorm.py +++ b/onnxruntime/python/tools/transformers/fusion_layernorm.py @@ -3,7 +3,6 @@ # Licensed under the MIT License. # -------------------------------------------------------------------------- from logging import getLogger -from typing import Dict, List from fusion_base import Fusion from onnx import TensorProto, helper @@ -13,19 +12,21 @@ class FusionLayerNormalization(Fusion): - def __init__(self, model: OnnxModel): + def __init__(self, model: OnnxModel, check_constant_and_dimension: bool = True, force: bool = False): super().__init__(model, "LayerNormalization", "ReduceMean") + self.check_constant_and_dimension = check_constant_and_dimension + self.force = force - def fuse(self, node, input_name_to_nodes: Dict, output_name_to_node: Dict): + def fuse(self, node, input_name_to_nodes: dict, output_name_to_node: dict): """ Fuse Layer Normalization subgraph into one node LayerNormalization: +----------------------+ | | | v [Root] --> ReduceMean --> Sub --> Pow --> ReduceMean --> Add --> Sqrt --> Div --> Mul --> Add - (axis=2 or -1) | (Y=2) (axis=2 or -1) (E-6 or E-12 or 0) ^ - | | - +-----------------------------------------------+ + (axis=2 or -1) | (Y=2) (axis=2 or -1) (B=E-6 or E-12) ^ + | | + +-------------------------------------------------+ It also handles cases of duplicated sub nodes exported from older version of PyTorch: +----------------------+ @@ -56,18 +57,20 @@ def fuse(self, node, input_name_to_nodes: Dict, output_name_to_node: Dict): for child in children: # Check if Sub --> Div exists div_node_1 = self.model.find_first_child_by_type(child, "Div", input_name_to_nodes, recursive=False) - - # Check if Sub --> Cast --> Div - div_node_2 = self.model.match_child_path(child, ["Cast", "Div"], exclude=[]) - if div_node_1 is not None: div_node = div_node_1 - elif div_node_2 is not None: - div_node = div_node_2[-1] + break + else: + # Check if Sub --> Cast --> Div + div_node_2 = self.model.match_child_path(child, ["Cast", "Div"]) + if div_node_2 is not None: + div_node = div_node_2[-1] + break + if div_node is None: return - path_id, parent_nodes, _ = self.model.match_parent_paths( + _path_id, parent_nodes, _ = self.model.match_parent_paths( div_node, [ (["Sqrt", "Add", "ReduceMean", "Pow", "Sub"], [1, 0, 0, 0, 0]), @@ -75,72 +78,93 @@ def fuse(self, node, input_name_to_nodes: Dict, output_name_to_node: Dict): ], output_name_to_node, ) - if path_id < 0: + if parent_nodes is None: return sub_node = parent_nodes[-1] if sub_node not in children: return - second_add_node = parent_nodes[1] - i, add_weight = self.model.get_constant_input(second_add_node) - if add_weight is None or add_weight <= 0 or add_weight > 1.0e-4: - logger.debug(f"skip SkipLayerNormalization fusion since epsilon value is not expected: {add_weight}") + add_eps_node = parent_nodes[1] + i, epsilon = self.model.get_constant_input(add_eps_node) + if epsilon is None or epsilon <= 0 or epsilon > 1.0e-4: + logger.debug(f"skip SkipLayerNormalization fusion since epsilon value is not expected: {epsilon}") return pow_node = parent_nodes[3] if self.model.find_constant_input(pow_node, 2.0) != 1: return - temp_node = input_name_to_nodes[div_node.output[0]][0] - if temp_node.op_type == "Cast": - # Div --> Cast --> Mul - subgraph_nodes.append(temp_node) # add Cast node to list of subgraph nodes - mul_node = input_name_to_nodes[temp_node.output[0]][0] - else: - # Div --> Mul - mul_node = temp_node - if mul_node.op_type != "Mul": - return - - last_add_node = input_name_to_nodes[mul_node.output[0]][0] - if last_add_node.op_type != "Add": - return - - subgraph_nodes.append(node) - subgraph_nodes.extend(children) - subgraph_nodes.extend(parent_nodes[:-1]) - - subgraph_nodes.extend([last_add_node, mul_node, div_node]) - if not self.model.is_safe_to_fuse_nodes( - subgraph_nodes, - last_add_node.output, - input_name_to_nodes, - output_name_to_node, - ): - logger.debug("It is not safe to fuse LayerNormalization node. Skip") - return - - node_before_weight = div_node if temp_node.op_type != "Cast" else temp_node - weight_input = mul_node.input[1 - self.model.input_index(node_before_weight.output[0], mul_node)] - if not self.model.is_constant_with_specified_dimension(weight_input, 1, "layernorm weight"): - return - - bias_input = last_add_node.input[1 - self.model.input_index(mul_node.output[0], last_add_node)] - if not self.model.is_constant_with_specified_dimension(bias_input, 1, "layernorm bias"): - return - - self.nodes_to_remove.extend(subgraph_nodes) - - normalize_node = helper.make_node( - "LayerNormalization", - inputs=[node.input[0], weight_input, bias_input], - outputs=[last_add_node.output[0]], - name=self.model.create_node_name("LayerNormalization", name_prefix="LayerNorm"), - ) - normalize_node.attribute.extend([helper.make_attribute("epsilon", float(add_weight))]) - self.nodes_to_add.append(normalize_node) - self.node_name_to_graph_name[normalize_node.name] = self.this_graph_name + if div_node.output[0] not in input_name_to_nodes: + return + + # In MMDit model, Div might have two Mul+Add children paths. + div_children = input_name_to_nodes[div_node.output[0]] + for temp_node in div_children: + if temp_node.op_type == "Cast": + # Div --> Cast --> Mul + subgraph_nodes.append(temp_node) # add Cast node to list of subgraph nodes + if temp_node.output[0] not in input_name_to_nodes: + continue + mul_node = input_name_to_nodes[temp_node.output[0]][0] + else: + # Div --> Mul + mul_node = temp_node + if mul_node.op_type != "Mul": + continue + + if mul_node.output[0] not in input_name_to_nodes: + continue + last_add_node = input_name_to_nodes[mul_node.output[0]][0] + if last_add_node.op_type != "Add": + continue + + subgraph_nodes.append(node) + subgraph_nodes.extend(children) + subgraph_nodes.extend(parent_nodes[:-1]) + + subgraph_nodes.extend([last_add_node, mul_node, div_node]) + + node_before_weight = div_node if temp_node.op_type != "Cast" else temp_node + weight_input = mul_node.input[1 - self.model.input_index(node_before_weight.output[0], mul_node)] + if self.check_constant_and_dimension and not self.model.is_constant_with_specified_dimension( + weight_input, 1, "layernorm weight" + ): + continue + + bias_input = last_add_node.input[1 - self.model.input_index(mul_node.output[0], last_add_node)] + if self.check_constant_and_dimension and not self.model.is_constant_with_specified_dimension( + bias_input, 1, "layernorm bias" + ): + continue + + layer_norm_output = last_add_node.output[0] + if not self.model.is_safe_to_fuse_nodes( + subgraph_nodes, + last_add_node.output, + input_name_to_nodes, + output_name_to_node, + ): + # If it is not safe to fuse, somce computation may be duplicated if we force to fuse it. + # It it unknown that force fusion might bring performance gain/loss. + # User need test performance impact to see whether forcing fusion can help. + if self.force: + self.prune_graph = True + else: + logger.debug("It is not safe to fuse LayerNormalization node. Skip") + continue + else: + self.nodes_to_remove.extend(subgraph_nodes) + + normalize_node = helper.make_node( + "LayerNormalization", + inputs=[node.input[0], weight_input, bias_input], + outputs=[layer_norm_output], + name=self.model.create_node_name("LayerNormalization", name_prefix="LayerNorm"), + ) + normalize_node.attribute.extend([helper.make_attribute("epsilon", float(epsilon))]) + self.nodes_to_add.append(normalize_node) + self.node_name_to_graph_name[normalize_node.name] = self.this_graph_name class FusionLayerNormalizationNCHW(Fusion): @@ -159,7 +183,7 @@ def get_weight_or_bias(self, output_name, description): return value.reshape([value.shape[0]]) - def create_transpose_node(self, input_name: str, perm: List[int], output_name=None): + def create_transpose_node(self, input_name: str, perm: list[int], output_name=None): """Append a Transpose node after an input""" node_name = self.model.create_node_name("Transpose") @@ -171,7 +195,7 @@ def create_transpose_node(self, input_name: str, perm: List[int], output_name=No return transpose_node - def fuse(self, node, input_name_to_nodes: Dict, output_name_to_node: Dict): + def fuse(self, node, input_name_to_nodes: dict, output_name_to_node: dict): """ Fuse Layer Normalization subgraph into one node LayerNormalization: +----------------------+ @@ -218,9 +242,9 @@ def fuse(self, node, input_name_to_nodes: Dict, output_name_to_node: Dict): if sub != sub_node: return - i, add_weight = self.model.get_constant_input(second_add_node) - if add_weight is None or add_weight <= 0 or add_weight > 1.0e-4: - logger.debug(f"skip SkipLayerNormalization fusion since epsilon value is not expected: {add_weight}") + i, epsilon = self.model.get_constant_input(second_add_node) + if epsilon is None or epsilon <= 0 or epsilon > 1.0e-4: + logger.debug(f"skip SkipLayerNormalization fusion since epsilon value is not expected: {epsilon}") return axes = OnnxModel.get_node_attribute(reduce_mean_node, "axes") @@ -286,7 +310,7 @@ def fuse(self, node, input_name_to_nodes: Dict, output_name_to_node: Dict): outputs=[layernorm_node_name + "_out_nhwc"], name=layernorm_node_name, ) - normalize_node.attribute.extend([helper.make_attribute("epsilon", float(add_weight))]) + normalize_node.attribute.extend([helper.make_attribute("epsilon", float(epsilon))]) self.nodes_to_add.append(transpose_input) self.nodes_to_add.append(normalize_node) @@ -303,7 +327,7 @@ class FusionLayerNormalizationTF(Fusion): def __init__(self, model: OnnxModel): super().__init__(model, "LayerNormalization", "Add", "TF") - def fuse(self, node, input_name_to_nodes: Dict, output_name_to_node: Dict): + def fuse(self, node, input_name_to_nodes: dict, output_name_to_node: dict): """ Layer Norm from Tensorflow model(using keras2onnx or tf2onnx): +------------------------------------+ diff --git a/onnxruntime/python/tools/transformers/fusion_mha_mmdit.py b/onnxruntime/python/tools/transformers/fusion_mha_mmdit.py new file mode 100644 index 0000000000000..48f6f9a9686ee --- /dev/null +++ b/onnxruntime/python/tools/transformers/fusion_mha_mmdit.py @@ -0,0 +1,667 @@ +# ------------------------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. +# -------------------------------------------------------------------------- +from logging import getLogger + +import numpy as np +from fusion_base import Fusion +from fusion_utils import FusionUtils +from onnx import NodeProto, TensorProto, helper, numpy_helper +from onnx_model import OnnxModel + +logger = getLogger(__name__) + + +class FusionMultiHeadAttentionMMDit(Fusion): + """ + Fuse MultiHeadAttention for Multimodal Diffusion Transformer (MMDiT). + """ + + def __init__(self, model: OnnxModel): + super().__init__(model, fused_op_type="MultiHeadAttention", search_op_types=["Softmax"]) + self.unsqueeze_update_map = {} + + def get_num_heads(self, start_node: NodeProto, output_name_to_node, input_index=0) -> int: + """ + Detect num_heads from Reshape & Transpose of q/k/v for both Stable Diffusion 3.x and Flux 1.x: + + MatMul .. [-1] [24] .. + | | | / / + Add Concat(axis=0) + | / + Reshape + | + Transpose(perm=0,1,3,2) + | + (start_node) + """ + nodes = self.model.match_parent_path( + start_node, ["Transpose", "Reshape", "Concat"], [input_index, 0, 1], output_name_to_node=output_name_to_node + ) + if nodes is None: + return 0 + + concat_shape = nodes[-1] + if len(concat_shape.input) != 4: + return 0 + + value = self.model.get_constant_value(concat_shape.input[2]) + if value is None: + return 0 + + if len(value.shape) != 1: + return 0 + + return int(value[0]) + + def get_num_heads_from_k(self, transpose_k: NodeProto, output_name_to_node, concat_before_transpose: bool) -> int: + """ + Detect num_heads from subgraph like the following (num_heads=24 in this example): + MatMu .. [-1] [24] .. + | | | / / + Add Concat + | / + Reshape + | + Transpose(perm=0,2,1,3) + | + SimplifiedLayerNormalization + | + Transpose(perm=0,1,3,2) + + Another variant is to an extra Concat node to join two symmetrical subgraphs: + + | | + MatMul MatMul .. [-1] [24] .. + | | | | / / + Add Concat Add Concat + | / | / + Reshape Reshape + | | + Transpose Transpose(perm=0,2,1,3) + | | + SimplifiedLayerNormalization SimplifiedLayerNormalization + | / + Concat + | + Transpose(perm=0,1,3,2) + + Both patterns are used in stable diffusion 3.5 model. + """ + if concat_before_transpose: + nodes = self.model.match_parent_path( + transpose_k, ["Concat", "SimplifiedLayerNormalization"], [0, 1], output_name_to_node=output_name_to_node + ) + if nodes: + return self.get_num_heads(nodes[1], output_name_to_node) + else: + nodes = self.model.match_parent_path( + transpose_k, ["SimplifiedLayerNormalization"], [0], output_name_to_node=output_name_to_node + ) + if nodes: + return self.get_num_heads(nodes[0], output_name_to_node) + + return 0 + + def reshape_to_3d(self, input_name: str, output_name: str) -> str: + """Add a Reshape node to convert 4D BxSxNxH to 3D BxSxD. + + Args: + input_name (str): input name for the 4D tensor of shape BxSxNxH. + output_name (str): output name for the 3D tensor of shape BxSxD, where D = N * H. + + Returns: + str: the output name + """ + + new_dims_name = "bsnh_to_bsd_reshape_dims" + new_dims = self.model.get_initializer(new_dims_name) + if new_dims is None: + new_dims = numpy_helper.from_array(np.array([0, 0, -1], dtype="int64"), name=new_dims_name) + self.model.add_initializer(new_dims, self.this_graph_name) + reshape_q = helper.make_node( + "Reshape", + inputs=[input_name, new_dims_name], + outputs=[output_name], + name=self.model.create_node_name("Reshape"), + ) + self.nodes_to_add.append(reshape_q) + self.node_name_to_graph_name[reshape_q.name] = self.this_graph_name + return reshape_q.output[0] + + def adjust_query_from_bnsh_to_bsd_no_concat(self, mul_q: NodeProto, output_name_to_node) -> str | None: + """ + MultiHeadAttenion requires query in BSD format. This function adjusts query from BNSH to BSD format. + + Before: + MatMul + | + Add Concat + | / + Reshape + | + Transpose(perm=0,2,1,3) + | + SimplifiedLayerNorm + | + Mul + + After: + MatMul + | + Add Concat + | / + Reshape + | + SimplifiedLayerNorm + | + Reshape (shape=[0, 0, -1]) + """ + + path = self.model.match_parent_path( + mul_q, + ["SimplifiedLayerNormalization", "Transpose"], + [0, 0], + ) + if path is None: + return None + sln_a, transpose_a = path + + if not FusionUtils.check_node_attribute(transpose_a, "perm", [0, 2, 1, 3]): + return None + + # Update the graph + sln_a.input[0] = transpose_a.input[0] + sln_output = sln_a.output[0] + sln_a.output[0] = sln_output + "_BSNH" + + return self.reshape_to_3d(sln_a.output[0], sln_output + "_BSD") + + def adjust_query_from_bnsh_to_bsd(self, mul_q: NodeProto, output_name_to_node) -> str | None: + """ + MultiHeadAttenion requires query in BSD format. This function adjusts query from BNSH to BSD format. + + Before: + MatMul MatMul + | | + Add Concat Add Concat + | / | / + Reshape Reshape + | | + Transpose(perm=0,2,1,3) Transpose(perm=0,2,1,3) + | | + SimplifiedLayerNorm SimplifiedLayerNorm + | / + Concat(axis=2) + | + Mul + + After: + MatMul MatMul + | | + Add Concat Add Concat + | / | / + Reshape Reshape + | | + SimplifiedLayerNorm SimplifiedLayerNorm + | / + Concat(axis=1) + | + Reshape (shape=[0, 0, -1]) + """ + + path = self.model.match_parent_path( + mul_q, + ["Concat", "SimplifiedLayerNormalization", "Transpose"], + [0, 0, 0], + ) + if path is None: + return None + concat, sln_a, transpose_a = path + + if len(concat.input) != 2: + return None + + path = self.model.match_parent_path( + concat, + ["SimplifiedLayerNormalization", "Transpose"], + [1, 0], + ) + if path is None: + return None + sln_b, transpose_b = path + + if not FusionUtils.check_node_attribute(transpose_a, "perm", [0, 2, 1, 3]): + return None + + if not FusionUtils.check_node_attribute(transpose_b, "perm", [0, 2, 1, 3]): + return None + + if not FusionUtils.check_node_attribute(concat, "axis", 2): + return None + + # Update the graph + sln_a.input[0] = transpose_a.input[0] + sln_b.input[0] = transpose_b.input[0] + + new_concat_node = helper.make_node( + "Concat", + inputs=[sln_a.output[0], sln_b.output[0]], + outputs=[concat.output[0] + "_BSNH"], + name=self.model.create_node_name("Concat"), + axis=1, + ) + self.nodes_to_add.append(new_concat_node) + self.node_name_to_graph_name[new_concat_node.name] = self.this_graph_name + + return self.reshape_to_3d(new_concat_node.output[0], concat.output[0] + "_BSD") + + def update_unsqueeze_axes_1_to_2(self, unsqueeze: NodeProto) -> str: + updated_unsqueeze_output = self.unsqueeze_update_map.get(unsqueeze.name) + if updated_unsqueeze_output is None: + if len(unsqueeze.input) == 1: + new_node = helper.make_node( + "Unsqueeze", + inputs=unsqueeze.input, + outputs=[unsqueeze.output[0] + "_BSNH"], + name=self.model.create_node_name("Unsqueeze"), + axes=[2], + ) + else: + initializer_name = "unsqueeze_axes_2" + if self.model.get_initializer(initializer_name) is None: + unsqueeze_axes_2 = helper.make_tensor( + name=initializer_name, + data_type=TensorProto.INT64, + dims=[1], # Shape of the tensor + vals=[2], # Tensor values + ) + self.model.add_initializer(unsqueeze_axes_2, self.this_graph_name) + + new_node = helper.make_node( + "Unsqueeze", + inputs=[unsqueeze.input[0], initializer_name], + outputs=[unsqueeze.output[0] + "_BSNH"], + name=self.model.create_node_name("Unsqueeze"), + ) + + self.nodes_to_add.append(new_node) + self.node_name_to_graph_name[new_node.name] = self.this_graph_name + updated_unsqueeze_output = new_node.output[0] + self.unsqueeze_update_map[unsqueeze.name] = updated_unsqueeze_output + + return updated_unsqueeze_output + + def update_unsqueeze_axes(self, add: NodeProto, output_name_to_node: dict[str, NodeProto]) -> bool: + """ + Update axes of Unsqueeze from [1] to [2] in the following pattern: + Unsqueeze Unsqueeze + (axes=[0]) (axes=[0]) + | | + Unsqueeze Unsqueeze + ... (axes=[1]) ... (axes=[1]) + | / | / + Mul Mul + | / + Add + Args: + add (NodeProto): the Add node + output_name_to_node (Dict[str, NodeProto]): mapping from output name to node + + Returns: + bool: True if the pattern is matched and updated successfully, False otherwise. + """ + if len(add.input) != 2: + return False + + # Check axes of Unsqueeze nodes are [0] and [1], and change to [0] and [2] respectively. + nodes_b = self.model.match_parent_path(add, ["Mul", "Unsqueeze", "Unsqueeze"], [1, 1, 0], output_name_to_node) + if nodes_b is None: + return False + + fusion_utils = FusionUtils(self.model) + axes_1 = fusion_utils.get_squeeze_or_unsqueeze_axes(nodes_b[1]) + if axes_1 is None or axes_1 != [1]: + return False + + axes_0 = fusion_utils.get_squeeze_or_unsqueeze_axes(nodes_b[2]) + if axes_0 is None or axes_0 != [0]: + return False + + # Check axes of Unsqueeze nodes are [0] and [1], and change to [0] and [2] respectively. + nodes_a = self.model.match_parent_path(add, ["Mul", "Unsqueeze", "Unsqueeze"], [0, 1, 0], output_name_to_node) + if nodes_a is None: + return False + + axes_1 = fusion_utils.get_squeeze_or_unsqueeze_axes(nodes_a[1]) + if axes_1 is None or axes_1 != [1]: + return False + + axes_0 = fusion_utils.get_squeeze_or_unsqueeze_axes(nodes_a[2]) + if axes_0 is None or axes_0 != [0]: + return False + + nodes_a[0].input[1] = self.update_unsqueeze_axes_1_to_2(nodes_a[1]) + nodes_b[0].input[1] = self.update_unsqueeze_axes_1_to_2(nodes_b[1]) + return True + + def adjust_flux_query_from_bnsh_to_bsd(self, mul_q: NodeProto, output_name_to_node) -> str | None: + """ + Adjust graph to change query format from BNSH to BSD for Flux model. + Note that the graph pattern is complex, and we only do a shallow match here. + + Before: + | | + Transpose(perm=0,2,1,3) Transpose(perm=0,2,1,3) + | | + SimplifiedLayerNorm SimplifiedLayerNorm + | / + Concat(axis=2) + | + Mul Mul + | / + Add + | + Mul + + After (Transpose nods are removed, and a Reshape is added): + + | | + SimplifiedLayerNorm SimplifiedLayerNorm + | / + Concat(axis=1) + | + Mul Mul + | / + Add + | + Reshape (shape=[0, 0, -1]) + """ + + path = self.model.match_parent_path( + mul_q, + ["Add", "Mul", "Concat", "SimplifiedLayerNormalization", "Transpose"], + [0, 0, 0, 0, 0], + ) + if path is None: + return None + add, _mul_a, concat, sln_a, transpose_a = path + + if len(concat.input) != 2: + return None + + path = self.model.match_parent_path( + concat, + ["SimplifiedLayerNormalization", "Transpose"], + [1, 0], + ) + if path is None: + return None + sln_b, transpose_b = path + + if not FusionUtils.check_node_attribute(transpose_a, "perm", [0, 2, 1, 3]): + return None + + if not FusionUtils.check_node_attribute(transpose_b, "perm", [0, 2, 1, 3]): + return None + + if not FusionUtils.check_node_attribute(concat, "axis", 2): + return None + + # Need adjust axes of Unsqueeze nodes from [1] to [2] so that the tensors to Mul nodes are BSNH instead of BNSH. + if not self.update_unsqueeze_axes(add, output_name_to_node): + return None + + # Update the graph + sln_a.input[0] = transpose_a.input[0] + sln_b.input[0] = transpose_b.input[0] + + new_concat_node = helper.make_node( + "Concat", + inputs=[sln_a.output[0], sln_b.output[0]], + outputs=[concat.output[0] + "_BSNH"], + name=self.model.create_node_name("Concat"), + axis=1, + ) + self.nodes_to_add.append(new_concat_node) + self.node_name_to_graph_name[new_concat_node.name] = self.this_graph_name + self.model.replace_input_of_all_nodes(concat.output[0], new_concat_node.output[0]) + + return self.reshape_to_3d(add.output[0], add.output[0] + "_BSD") + + def adjust_flux_single_query_from_bnsh_to_bsd(self, mul_q: NodeProto, output_name_to_node) -> str | None: + """ + Adjust graph to change query format from BNSH to BSD for Flux model. + Note that the graph pattern is complex, and we only do a shallow match here. + + Before: + | + Transpose(perm=0,2,1,3) + | + SimplifiedLayerNorm + | + Mul Mul + | / + Add + | + Mul + + After (Transpose is removed, and a Reshape is added): + + | + SimplifiedLayerNorm + | + Mul Mul + | / + Add + | + Reshape (shape=[0, 0, -1]) + """ + + path = self.model.match_parent_path( + mul_q, + ["Add", "Mul", "SimplifiedLayerNormalization", "Transpose"], + [0, 0, 0, 0], + ) + if path is None: + return None + add, _mul_a, sln_a, transpose_a = path + + if not FusionUtils.check_node_attribute(transpose_a, "perm", [0, 2, 1, 3]): + return None + + # Need adjust axes of Unsqueeze nodes from [1] to [2] so that the tensors to Mul nodes are BSNH instead of BNSH. + if not self.update_unsqueeze_axes(add, output_name_to_node): + return None + + # Update the graph + sln_a.input[0] = transpose_a.input[0] + add.output[0] = add.output[0] + "_BSNH" + + return self.reshape_to_3d(add.output[0], add.output[0] + "_BSD") + + def transpose_reshape_bnsh_to_bsd(self, q: str, output_name_to_node) -> str | None: + transpose_q = helper.make_node( + "Transpose", + [q], + [q + "_BSNH"], + name=self.model.create_node_name("Transpose", name_prefix="Transpose_BNSH_to_BSNH"), + perm=[0, 2, 1, 3], + ) + self.nodes_to_add.append(transpose_q) + self.node_name_to_graph_name[transpose_q.name] = self.this_graph_name + + return self.reshape_to_3d(q + "_BSNH", q + "_BSD") + + def create_multihead_attention_node( + self, + q: str, + k: str, + v: str, + output: str, + num_heads: int, + ) -> NodeProto: + """ + Create a MultiHeadAttention node. + + Args: + q (str): name of q + k (str): name of k + v (str): name of v + output (str): output name of MHA + num_heads (int): number of attention heads. If a model is pruned, it is the number of heads after pruning. + + Returns: + NodeProto: the node created. + """ + + assert num_heads > 0 + + # Add inputs for MHA: Query, Key, Value (Proj_Bias, Mask, Attention_Bias, Past_K, Past_V are optional) + mha_inputs = [q, k, v] + + # Add outputs for MHA (Present_K, Present_V are optional) + mha_outputs = [output] + + mha_node = helper.make_node( + "MultiHeadAttention", + inputs=mha_inputs, + outputs=mha_outputs, + name=self.model.create_node_name("MultiHeadAttention"), + ) + + mha_node.domain = "com.microsoft" + mha_node.attribute.extend([helper.make_attribute("num_heads", num_heads)]) + + # No mask is used in MMDit model, so we need not set the optional mask_filter_value attribute. + return mha_node + + def fuse(self, node, input_name_to_nodes, output_name_to_node): + assert node.op_type == "Softmax" + softmax = node + + # Softmax output shall not be graph output. + if self.model.find_graph_output(softmax.output[0]): + return + + nodes = self.model.match_child_path( + softmax, ["MatMul", "Transpose", "Reshape"], [(0, 0), (0, 0), (0, 0)], input_name_to_nodes + ) + if nodes is None: + return + + matmul_s_v, transpose_out, reshape_out = nodes + if not FusionUtils.check_node_attribute(transpose_out, "perm", [0, 2, 1, 3]): + return + + q_nodes = self.model.match_parent_path( + softmax, + ["MatMul", "Mul", "Sqrt", "Div", "Sqrt", "Cast", "Slice", "Shape"], + [0, 0, 1, 0, 1, 0, 0, 0], + ) + + if q_nodes is None: + return + + matmul_qk, mul_q, sqrt_q_2, div_q, sqrt_q, _, _, shape_q = q_nodes + + q_bnsh = mul_q.input[0] + if q_bnsh != shape_q.input[0]: + return + + k_nodes = self.model.match_parent_path(matmul_qk, ["Mul", "Transpose"], [1, 0]) + if k_nodes is None: + return + + mul_k, transpose_k = k_nodes + k = transpose_k.input[0] + if not FusionUtils.check_node_attribute(transpose_k, "perm", [0, 1, 3, 2]): + return + + k_scale_nodes = self.model.match_parent_path(mul_k, ["Sqrt", "Div"], [1, 0]) + if k_scale_nodes is None: + return + if k_scale_nodes[0].input[0] != sqrt_q_2.input[0]: + return + + v = matmul_s_v.input[1] + + # Here we sanity check the v path to make sure it is in the expected BNSH format. + concat_v = self.model.match_parent(matmul_s_v, "Concat", input_index=1, output_name_to_node=output_name_to_node) + if concat_v is not None: + # Match v path like: + # -- Transpose (perm=[0,2,1,3]) ----+ + # | + # v + # -- Transpose (perm=[0,2,1,3]) -> Concat -> (v) + transpose_1 = self.model.match_parent( + concat_v, "Transpose", input_index=0, output_name_to_node=output_name_to_node + ) + if transpose_1 is None: + return + if not FusionUtils.check_node_attribute(transpose_1, "perm", [0, 2, 1, 3]): + return + + transpose_2 = self.model.match_parent( + concat_v, "Transpose", input_index=1, output_name_to_node=output_name_to_node + ) + if transpose_2 is None: + return + if not FusionUtils.check_node_attribute(transpose_2, "perm", [0, 2, 1, 3]): + return + else: + # Match v path like: + # -- Transpose (perm=[0,2,1,3]) -> (v) + transpose_1 = self.model.match_parent( + matmul_s_v, "Transpose", input_index=1, output_name_to_node=output_name_to_node + ) + if transpose_1 is None: + return + if not FusionUtils.check_node_attribute(transpose_1, "perm", [0, 2, 1, 3]): + return + + # Match patterns for Flux. + num_heads = ( + self.get_num_heads(concat_v, output_name_to_node) + if concat_v + else self.get_num_heads(matmul_s_v, output_name_to_node, input_index=1) + ) + + if num_heads == 0: + # Match patterns for Stable Diffusion 3.5. + num_heads = self.get_num_heads_from_k(transpose_k, output_name_to_node, concat_v is not None) + if num_heads <= 0: + return + + # Q is in BNSH format, we need to adjust it to BSD format due to limitation of MHA op. + # TODO: MHA op support BNSH format to reduce the effort in fusion. + if concat_v is not None: + query = self.adjust_query_from_bnsh_to_bsd(mul_q, output_name_to_node) + else: + query = self.adjust_query_from_bnsh_to_bsd_no_concat(mul_q, output_name_to_node) + + if query is None: + query = self.adjust_flux_query_from_bnsh_to_bsd(mul_q, output_name_to_node) + if query is None: + query = self.adjust_flux_single_query_from_bnsh_to_bsd(mul_q, output_name_to_node) + if query is None: + # fallback to use Transpose and Add to adjust query from BNSH to BSD + # This is more general approach. + # However, it might be slower if the extra Transpose node cannot be removed by ORT optimizer. + query = self.transpose_reshape_bnsh_to_bsd(q_bnsh, output_name_to_node) + + new_node = self.create_multihead_attention_node( + q=query, + k=k, + v=v, + output=reshape_out.output[0], + num_heads=num_heads, + ) + self.nodes_to_add.append(new_node) + self.node_name_to_graph_name[new_node.name] = self.this_graph_name + + self.nodes_to_remove.extend([matmul_s_v, transpose_out, reshape_out]) + + # Use prune graph to remove nodes + self.prune_graph = True diff --git a/onnxruntime/python/tools/transformers/fusion_nhwc_conv.py b/onnxruntime/python/tools/transformers/fusion_nhwc_conv.py index 5233fdf272fbd..0ad50a270caf7 100644 --- a/onnxruntime/python/tools/transformers/fusion_nhwc_conv.py +++ b/onnxruntime/python/tools/transformers/fusion_nhwc_conv.py @@ -4,7 +4,6 @@ # -------------------------------------------------------------------------- from logging import getLogger -from typing import List from fusion_base import Fusion from fusion_utils import FusionUtils @@ -22,7 +21,7 @@ def __init__(self, model: OnnxModel, update_weight=False): self.update_weight = update_weight self.fusion_utils = FusionUtils(model) - def create_transpose_node(self, input_name: str, perm: List[int], output_name=None): + def create_transpose_node(self, input_name: str, perm: list[int], output_name=None): """Append a Transpose node after an input""" node_name = self.model.create_node_name("Transpose") diff --git a/onnxruntime/python/tools/transformers/fusion_qordered_attention.py b/onnxruntime/python/tools/transformers/fusion_qordered_attention.py index fb020298bc210..52ccfc6fe368d 100644 --- a/onnxruntime/python/tools/transformers/fusion_qordered_attention.py +++ b/onnxruntime/python/tools/transformers/fusion_qordered_attention.py @@ -4,7 +4,6 @@ # -------------------------------------------------------------------------- from logging import getLogger -from typing import Tuple import numpy as np from fusion_attention import AttentionMask @@ -30,7 +29,7 @@ def __init__( super().__init__(model, "QOrderedAttention", "QOrderedLayerNormalization") - def get_num_heads_and_hidden_size(self, reshape_q: NodeProto) -> Tuple[int, int]: + def get_num_heads_and_hidden_size(self, reshape_q: NodeProto) -> tuple[int, int]: """Detect num_heads and hidden_size from a reshape node. Args: reshape_q (NodeProto): reshape node for Q diff --git a/onnxruntime/python/tools/transformers/fusion_qordered_gelu.py b/onnxruntime/python/tools/transformers/fusion_qordered_gelu.py index 5f395b364eb6f..6a6b52a988c00 100644 --- a/onnxruntime/python/tools/transformers/fusion_qordered_gelu.py +++ b/onnxruntime/python/tools/transformers/fusion_qordered_gelu.py @@ -4,7 +4,6 @@ # -------------------------------------------------------------------------- from logging import getLogger -from typing import Dict from fusion_base import Fusion from fusion_utils import FusionUtils @@ -18,7 +17,7 @@ class FusionQOrderedGelu(Fusion): def __init__(self, model: OnnxModel): super().__init__(model, "QOrderedGelu", ["Gelu", "FastGelu"]) - def fuse(self, node, input_name_to_nodes: Dict, output_name_to_node: Dict): + def fuse(self, node, input_name_to_nodes: dict, output_name_to_node: dict): """ INPUT PATTERN Fuse (quantized) Gelu subgraph into one node QOrderedGelu: diff --git a/onnxruntime/python/tools/transformers/fusion_qordered_layernorm.py b/onnxruntime/python/tools/transformers/fusion_qordered_layernorm.py index 5ec6dadc1e677..c8b1be71d4616 100644 --- a/onnxruntime/python/tools/transformers/fusion_qordered_layernorm.py +++ b/onnxruntime/python/tools/transformers/fusion_qordered_layernorm.py @@ -3,7 +3,6 @@ # Licensed under the MIT License. # -------------------------------------------------------------------------- from logging import getLogger -from typing import Dict from fusion_base import Fusion from fusion_utils import FusionUtils @@ -17,7 +16,7 @@ class FusionQOrderedLayerNormalization(Fusion): def __init__(self, model: OnnxModel): super().__init__(model, "QOrderedLayerNormalization", "LayerNormalization") - def fuse(self, node, input_name_to_nodes: Dict, output_name_to_node: Dict): + def fuse(self, node, input_name_to_nodes: dict, output_name_to_node: dict): """ Fuse (quantized) Layer Normalization subgraph into one node QOrderedLayerNormalization: quantized input -> DQ diff --git a/onnxruntime/python/tools/transformers/fusion_qordered_matmul.py b/onnxruntime/python/tools/transformers/fusion_qordered_matmul.py index 681160479faef..3a373f3fd4d78 100644 --- a/onnxruntime/python/tools/transformers/fusion_qordered_matmul.py +++ b/onnxruntime/python/tools/transformers/fusion_qordered_matmul.py @@ -4,7 +4,6 @@ # -------------------------------------------------------------------------- from logging import getLogger -from typing import Dict from fusion_base import Fusion from fusion_utils import FusionUtils @@ -18,7 +17,7 @@ class FusionQOrderedMatMul(Fusion): def __init__(self, model: OnnxModel): super().__init__(model, "QOrderedMatMul", "MatMul") - def fuse(self, node, input_name_to_nodes: Dict, output_name_to_node: Dict): + def fuse(self, node, input_name_to_nodes: dict, output_name_to_node: dict): matmul_children = self.model.get_children(node, input_name_to_nodes) # Should only have 1 child - Bias Add diff --git a/onnxruntime/python/tools/transformers/fusion_rotary_attention.py b/onnxruntime/python/tools/transformers/fusion_rotary_attention.py index efdcbcfb3dcdc..6657fde2257e5 100644 --- a/onnxruntime/python/tools/transformers/fusion_rotary_attention.py +++ b/onnxruntime/python/tools/transformers/fusion_rotary_attention.py @@ -3,7 +3,6 @@ # Licensed under the MIT License. # -------------------------------------------------------------------------- import logging -from typing import Optional, Union from fusion_attention import FusionAttention from fusion_base import Fusion @@ -51,8 +50,8 @@ def create_mha_node( past_v: str = "", present_k: str = "", present_v: str = "", - scale: Optional[float] = None, - ) -> Union[NodeProto, None]: + scale: float | None = None, + ) -> NodeProto | None: assert self.num_heads > 0 if self.hidden_size > 0 and (self.hidden_size % self.num_heads) != 0: @@ -1131,7 +1130,7 @@ def reassign_extra_outputs(self, rot_emb_node: NodeProto, function: FunctionProt extra_initializers.append(constant_tensorproto.name) # Update references of Constant node outputs to initializer references - for extra_output, extra_initializer in zip(extra_outputs, extra_initializers): + for extra_output, extra_initializer in zip(extra_outputs, extra_initializers, strict=False): nodes_to_update = list(filter(lambda entry: extra_output in entry.input, self.model.model.graph.node)) for node_to_update in nodes_to_update: OnnxModel.replace_node_input(node_to_update, extra_output, extra_initializer) diff --git a/onnxruntime/python/tools/transformers/fusion_shape.py b/onnxruntime/python/tools/transformers/fusion_shape.py index dfa77fc7d0221..18a8fda6a67b1 100644 --- a/onnxruntime/python/tools/transformers/fusion_shape.py +++ b/onnxruntime/python/tools/transformers/fusion_shape.py @@ -4,7 +4,6 @@ # -------------------------------------------------------------------------- from logging import getLogger -from typing import Dict, List, Union from fusion_base import Fusion from fusion_utils import FusionUtils @@ -22,13 +21,13 @@ def __init__(self, model: OnnxModel): self.shape_infer = None self.shape_infer_done = False - def get_dimensions_from_tensor_proto(self, tensor_proto: TensorProto) -> Union[int, None]: + def get_dimensions_from_tensor_proto(self, tensor_proto: TensorProto) -> int | None: if tensor_proto.type.tensor_type.HasField("shape"): return len(tensor_proto.type.tensor_type.shape.dim) else: return None - def get_dimensions(self, input_name: str) -> Union[int, None]: + def get_dimensions(self, input_name: str) -> int | None: shape = self.model.get_shape(input_name) if shape is not None: return len(shape) @@ -45,8 +44,8 @@ def get_dimensions(self, input_name: str) -> Union[int, None]: def fuse( self, concat_node: NodeProto, - input_name_to_nodes: Dict[str, List[NodeProto]], - output_name_to_node: Dict[str, NodeProto], + input_name_to_nodes: dict[str, list[NodeProto]], + output_name_to_node: dict[str, NodeProto], ): # # Simplify subgraph like diff --git a/onnxruntime/python/tools/transformers/fusion_simplified_layernorm.py b/onnxruntime/python/tools/transformers/fusion_simplified_layernorm.py index a872b8c2075bc..a0eff081675fe 100644 --- a/onnxruntime/python/tools/transformers/fusion_simplified_layernorm.py +++ b/onnxruntime/python/tools/transformers/fusion_simplified_layernorm.py @@ -1,5 +1,4 @@ import logging -from typing import Dict from fusion_base import Fusion from fusion_skiplayernorm import FusionSkipLayerNormalization @@ -13,139 +12,118 @@ class FusionSimplifiedLayerNormalization(Fusion): def __init__(self, model: OnnxModel): super().__init__(model, "SimplifiedLayerNormalization", "Mul") - def fuse(self, node, input_name_to_nodes: Dict, output_name_to_node: Dict): + def fuse(self, node, input_name_to_nodes: dict, output_name_to_node: dict): if node.op_type != "Mul": return sim_ln_nodes = None - # SimplifiedLayerNorm calculation (notation from https://onnx.ai/onnx/operators/onnx__LayerNormalization.html#summary): - # DD = Pow(D, 2) - # Var = ReduceMean(DD) - # VarEps = Add(Var, epsilon) - # StdDev = Sqrt(VarEps) - # InvStdDev = Div(1, StdDev) - # Normalized = Mul(D, InvStdDev) - # NormalizedScaled = Mul(Normalized, Scale) - - # SimplifiedLayerNorm - # +-------------------------------------------------------+ - # | | - # Add --> Pow --> ReduceMean --> Add --> Sqrt --> Div --> Mul --> Mul - # | - # node - sim_ln_nodes_1 = self.model.match_parent_path( - node, - ["Mul", "Div", "Sqrt", "Add", "ReduceMean", "Pow", "Add"], - [1, 1, 1, 0, 0, 0, 0], - ) - # SimplifiedLayerNorm - # +-------------------------------------------------------+ - # | | - # Gather --> Pow --> ReduceMean --> Add --> Sqrt --> Div --> Mul --> Mul - # | - # node - sim_ln_nodes_2 = self.model.match_parent_path( - node, - ["Mul", "Div", "Sqrt", "Add", "ReduceMean", "Pow", "Gather"], - [1, 1, 1, 0, 0, 0, 0], - ) - - # For LLaMA from Microsoft custom export: - # sim_ln_nodes_3 uses a different start parent index than sim_ln_nodes_1 + # RMSNorm formula: + # S = Pow(X, 2) or S = Mul(X, X) + # MS = ReduceMean(S) + # MSEps = Add(MS, epsilon) + # RMS = Sqrt(MSEps) + # InvRMS = Div(1, RMS) or InvRMS = Reciprocal(RMS) + # Normalized = Mul(D, InvRMS) + # Y = Mul(Normalized, Scale) # - # SimplifiedLayerNorm - # +-------------------------------------------------------+ - # | | - # Add --> Pow --> ReduceMean --> Add --> Sqrt --> Div --> Mul --> Mul - # | - # node - sim_ln_nodes_3 = self.model.match_parent_path( - node, - ["Mul", "Div", "Sqrt", "Add", "ReduceMean", "Pow", "Add"], - [0, 1, 1, 0, 0, 0, 0], - ) - - # sim_ln_nodes_4 starts with a graph input instead of an Add node like sim_ln_nodes_3 + # (root_input) ----------------------------------------+ + # | | + # v v + # Pow --> ReduceMean --> Add ---> Sqrt --> Div --> Mul --> Mul (node) + # (B=2) (A/B=eps) (A=1) (A/B=scale) # - # SimplifiedLayerNorm - # +-----------------------------------------------+ - # | | - # graph_input --> Pow --> ReduceMean --> Add --> Sqrt --> Div --> Mul - # | - # node - sim_ln_nodes_4 = self.model.match_parent_path( - node, - ["Mul", "Div", "Sqrt", "Add", "ReduceMean", "Pow"], - [0, 1, 1, 0, 0, 0], - ) - - # For Gemma from Microsoft custom export, which has a Multiply after the Gather: + # (root_input) ----------------------------------------+ + # | | | + # v v v + # Mul --> ReduceMean --> Add ---> Sqrt --> Div --> Mul --> Mul (node) + # (B=2) (A/B=eps) (A=1) (A/B=scale) # - # SimplifiedLayerNorm - # +-------------------------------------------------------+ - # | | - # Mul --> Pow --> ReduceMean --> Add --> Sqrt --> Div --> Mul --> Mul - # | - # node - sim_ln_nodes_5 = self.model.match_parent_path( + return_indice = [] + sim_ln_nodes = self.model.match_parent_path( node, - ["Mul", "Div", "Sqrt", "Add", "ReduceMean", "Pow", "Mul"], - [1, 1, 1, 0, 0, 0, 0], + ["Mul", "Div", "Sqrt", "Add", "ReduceMean"], + [None, 1, 1, 0, None], + output_name_to_node=output_name_to_node, + return_indice=return_indice, ) - add_node, pow_node = None, None - if sim_ln_nodes_1 is not None: - sim_ln_nodes = sim_ln_nodes_1 - add_node = sim_ln_nodes[3] - pow_node = sim_ln_nodes[-2] - elif sim_ln_nodes_2 is not None: - sim_ln_nodes = sim_ln_nodes_2 - add_node = sim_ln_nodes[3] - pow_node = sim_ln_nodes[-2] - elif sim_ln_nodes_3 is not None: - sim_ln_nodes = sim_ln_nodes_3 - add_node = sim_ln_nodes[3] - pow_node = sim_ln_nodes[-2] - elif sim_ln_nodes_4 is not None: - sim_ln_nodes = sim_ln_nodes_4 - add_node = sim_ln_nodes[3] - pow_node = sim_ln_nodes[-1] - # Verify that parent input to Pow node is graph_input - if pow_node.input[0] not in self.model.get_graphs_input_names(): + if sim_ln_nodes: + mul_node, div_node, _sqrt_node, add_node, reduce_mean_node = sim_ln_nodes + if not self.model.has_constant_input(div_node, 1.0): return - elif sim_ln_nodes_5 is not None: - sim_ln_nodes = sim_ln_nodes_5 - add_node = sim_ln_nodes[3] - pow_node = sim_ln_nodes[-2] else: + # Div(1, RMS) can also be represented as Reciprocal(RMS) like + # + # (root_input) -----------------------------------------------+ + # | | + # v v + # Pow --> ReduceMean --> Add ---> Sqrt --> Reciprocal --> Mul --> Mul (node) + # (B=2) (A/B=eps) (A/B=scale) + # + # (root_input) -----------------------------------------------+ + # | | | + # v v v + # Mul --> ReduceMean --> Add ---> Sqrt --> Reciprocal --> Mul --> Mul (node) + # (B=2) (A/B=eps) (A/B=scale) + # + sim_ln_nodes = self.model.match_parent_path( + node, + ["Mul", "Reciprocal", "Sqrt", "Add", "ReduceMean"], + [None, 1, 0, 0, None], + output_name_to_node=output_name_to_node, + return_indice=return_indice, + ) + if sim_ln_nodes is None: + return + mul_node, _reciprocal_node, _sqrt_node, add_node, reduce_mean_node = sim_ln_nodes + + pow_or_mul_node = self.model.get_parent(reduce_mean_node, 0, output_name_to_node) + if pow_or_mul_node is None or pow_or_mul_node.op_type not in ["Pow", "Mul"]: return - layernorm_weight_index = 1 if sim_ln_nodes in (sim_ln_nodes_3, sim_ln_nodes_4) else 0 - starts_with_graph_input = sim_ln_nodes == sim_ln_nodes_4 + if pow_or_mul_node.op_type == "Pow": + if self.model.find_constant_input(pow_or_mul_node, 2.0) != 1: + return + else: + assert pow_or_mul_node.op_type == "Mul" + if pow_or_mul_node[0] != pow_or_mul_node[1]: + return + + root_input = pow_or_mul_node.input[0] + if root_input != mul_node.input[0]: + return - if self.model.find_constant_input(pow_node, 2.0) != 1: + _i, epsilon = self.model.get_constant_input(add_node) + if epsilon is None or epsilon <= 0 or epsilon > 1.0e-4: + logger.warning(f"epsilon value is not expected: {epsilon}") return - root_input = pow_node.input[0] - if root_input != sim_ln_nodes[0].input[0]: + # ReduceMean must have keepdims == 1 + keepdims = self.model.get_node_attribute(reduce_mean_node, "keepdims") + if not keepdims: return - i, add_weight = self.model.get_constant_input(add_node) - if add_weight is None or add_weight <= 0 or add_weight > 1.0e-4: - logger.warning(f"epsilon value is not expected: {add_weight}") + # ReduceMean axes must refer only to the last dimension. + # Axes became an input in opset 18. Before then, axes was an attribute. + axes = self.model.get_node_attribute(reduce_mean_node, "axes") + if (not axes) and len(reduce_mean_node.input) > 1: + axes = self.model.get_constant_value(reduce_mean_node.input[1]) + # Make sure only one axis as required by SimplifiedLayerNormalization spec. + if not axes or len(axes) != 1: return - self.nodes_to_remove.extend(sim_ln_nodes[:-1] if not starts_with_graph_input else sim_ln_nodes) + self.nodes_to_remove.extend(sim_ln_nodes) + self.nodes_to_remove.append(pow_or_mul_node) self.nodes_to_remove.append(node) normalize_node = helper.make_node( "SimplifiedLayerNormalization", - inputs=[root_input, node.input[layernorm_weight_index]], + inputs=[root_input, node.input[1 - return_indice[0]]], outputs=[node.output[0]], - name=self.model.create_node_name("SimplifiedLayerNormalization", name_prefix="LayerNorm"), + name=self.model.create_node_name("SimplifiedLayerNormalization", name_prefix="RMSNorm"), ) - normalize_node.attribute.extend([helper.make_attribute("epsilon", float(add_weight))]) - normalize_node.attribute.extend([helper.make_attribute("axis", -1)]) + normalize_node.attribute.extend([helper.make_attribute("epsilon", float(epsilon))]) + normalize_node.attribute.extend([helper.make_attribute("axis", axes[0])]) normalize_node.attribute.extend([helper.make_attribute("stash_type", 1)]) self.nodes_to_add.append(normalize_node) self.node_name_to_graph_name[normalize_node.name] = self.this_graph_name diff --git a/onnxruntime/python/tools/transformers/fusion_skip_group_norm.py b/onnxruntime/python/tools/transformers/fusion_skip_group_norm.py index 676052f747967..b2b3af38253c2 100644 --- a/onnxruntime/python/tools/transformers/fusion_skip_group_norm.py +++ b/onnxruntime/python/tools/transformers/fusion_skip_group_norm.py @@ -3,7 +3,6 @@ # Licensed under the MIT License. # -------------------------------------------------------------------------- from logging import getLogger -from typing import List from fusion_base import Fusion from fusion_utils import NumpyHelper @@ -26,7 +25,7 @@ def __init__(self, model: OnnxModel): if self.shape_infer_helper is None: logger.warning("SkipGroupNorm fusion will be skipped since symbolic shape inference disabled or failed.") - def create_transpose_node(self, input_name: str, perm: List[int], output_name=None): + def create_transpose_node(self, input_name: str, perm: list[int], output_name=None): """Append a Transpose node after an input""" node_name = self.model.create_node_name("Transpose") if output_name is None: diff --git a/onnxruntime/python/tools/transformers/fusion_transpose.py b/onnxruntime/python/tools/transformers/fusion_transpose.py index ca699903a7cd9..d38fcffb2af0d 100644 --- a/onnxruntime/python/tools/transformers/fusion_transpose.py +++ b/onnxruntime/python/tools/transformers/fusion_transpose.py @@ -4,7 +4,6 @@ # -------------------------------------------------------------------------- from logging import getLogger -from typing import Dict, List from fusion_base import Fusion from fusion_utils import FusionUtils @@ -21,8 +20,8 @@ def __init__(self, model: OnnxModel): def fuse( self, transpose_node: NodeProto, - input_name_to_nodes: Dict[str, List[NodeProto]], - output_name_to_node: Dict[str, NodeProto], + input_name_to_nodes: dict[str, list[NodeProto]], + output_name_to_node: dict[str, NodeProto], ): """ Note that onnxruntime will do comprehensive transpose optimization after loading model. @@ -90,7 +89,7 @@ class FusionInsertTranspose(Fusion): def __init__(self, model: OnnxModel): super().__init__(model, "", "GroupNorm") - def create_transpose_node(self, input_name: str, perm: List[int], output_name=None): + def create_transpose_node(self, input_name: str, perm: list[int], output_name=None): """Append a Transpose node after an input""" node_name = self.model.create_node_name("Transpose") if output_name is None: @@ -102,8 +101,8 @@ def create_transpose_node(self, input_name: str, perm: List[int], output_name=No def fuse( self, group_norm_node: NodeProto, - input_name_to_nodes: Dict[str, List[NodeProto]], - output_name_to_node: Dict[str, NodeProto], + input_name_to_nodes: dict[str, list[NodeProto]], + output_name_to_node: dict[str, NodeProto], ): """ This optimization will insert an Transpose, and onnxruntime transpose optimizer will remove it together with diff --git a/onnxruntime/python/tools/transformers/fusion_utils.py b/onnxruntime/python/tools/transformers/fusion_utils.py index dbd9e828198ca..5343c77adb97a 100644 --- a/onnxruntime/python/tools/transformers/fusion_utils.py +++ b/onnxruntime/python/tools/transformers/fusion_utils.py @@ -3,7 +3,6 @@ # Licensed under the MIT License. # -------------------------------------------------------------------------- from logging import getLogger -from typing import Optional, Tuple import numpy from numpy import array_equal, ndarray @@ -18,7 +17,7 @@ class FusionUtils: def __init__(self, model: OnnxModel): self.model: OnnxModel = model - def cast_graph_input_to_int32(self, input_name: str) -> Tuple[bool, str]: + def cast_graph_input_to_int32(self, input_name: str) -> tuple[bool, str]: graph_input = self.model.find_graph_input(input_name) if graph_input is not None and graph_input.type.tensor_type.elem_type != TensorProto.INT32: cast_output, cast_node = self.cast_input_to_int32(input_name) @@ -48,9 +47,9 @@ def add_cast_node( self, input_name: str, to_type: int, - output_name: Optional[str] = None, + output_name: str | None = None, output_name_to_node=None, - graph_name: Optional[str] = None, + graph_name: str | None = None, ): if output_name is None: output_name = input_name + f"_cast_to_{to_type}" @@ -127,6 +126,19 @@ def skip_parent(model: OnnxModel, node, parent_node, input_name_to_nodes, node_i return parent_can_be_removed + def get_squeeze_or_unsqueeze_axes(self, node: NodeProto) -> ndarray | None: + assert node.op_type in ["Squeeze", "Unsqueeze"] + + # For opset >= 13, axes is an input instead of an attribute. + if len(node.input) > 1: + return self.model.get_constant_value(node.input[1]) + + axes = None + for attr in node.attribute: + if attr.name == "axes": + axes = helper.get_attribute_value(attr) + return axes + @staticmethod def check_node_attribute(node, attribute_name: str, expected_value, default_value=None): """Verify that a node has expected value for an attribute. diff --git a/onnxruntime/python/tools/transformers/io_binding_helper.py b/onnxruntime/python/tools/transformers/io_binding_helper.py index 0fa038d5cfc62..5870a031086ee 100644 --- a/onnxruntime/python/tools/transformers/io_binding_helper.py +++ b/onnxruntime/python/tools/transformers/io_binding_helper.py @@ -1,7 +1,8 @@ import copy import logging from collections import OrderedDict -from typing import Any, Dict, List, Mapping, Optional, Tuple, Union +from collections.abc import Mapping +from typing import Any import numpy import torch @@ -9,7 +10,7 @@ from onnxruntime import InferenceSession, RunOptions # Type alias -ShapeDict = Mapping[str, Union[Tuple, List[int]]] +ShapeDict = Mapping[str, tuple | list[int]] logger = logging.getLogger(__name__) @@ -88,7 +89,7 @@ def torch_type_to_numpy_type(torch_type: torch.dtype): return torch_type_to_numpy_type_map[torch_type] @staticmethod - def get_io_numpy_type_map(ort_session: InferenceSession) -> Dict[str, numpy.dtype]: + def get_io_numpy_type_map(ort_session: InferenceSession) -> dict[str, numpy.dtype]: """Create a mapping from input/output name to numpy data type""" name_to_numpy_type = {} for input in ort_session.get_inputs(): @@ -116,7 +117,7 @@ def prepare_io_binding( input_ids: torch.Tensor, position_ids: torch.Tensor, attention_mask: torch.Tensor, - past: List[torch.Tensor], + past: list[torch.Tensor], output_buffers, output_shapes, name_to_np_type=None, @@ -228,7 +229,7 @@ def __init__(self, ort_session: InferenceSession, device: torch.device, enable_c self.device = device # Pairs of input and output names that share the same buffer. - self.buffer_sharing: Dict[str, str] = {} + self.buffer_sharing: dict[str, str] = {} def set_buffer_sharing(self, input_name: str, output_name: str): assert input_name in self.input_names @@ -307,7 +308,7 @@ def allocate_buffers(self, shape_dict: ShapeDict): tensor.data_ptr(), ) - def infer(self, feed_dict: Dict[str, torch.Tensor], run_options: RunOptions = None, synchronize: bool = True): + def infer(self, feed_dict: dict[str, torch.Tensor], run_options: RunOptions = None, synchronize: bool = True): """Bind input tensors and run inference""" for name, tensor in feed_dict.items(): assert isinstance(tensor, torch.Tensor) and tensor.is_contiguous() @@ -330,7 +331,7 @@ def infer(self, feed_dict: Dict[str, torch.Tensor], run_options: RunOptions = No return self.output_tensors @staticmethod - def get_cuda_provider_options(device_id: int, enable_cuda_graph: bool, stream: int = 0) -> Dict[str, Any]: + def get_cuda_provider_options(device_id: int, enable_cuda_graph: bool, stream: int = 0) -> dict[str, Any]: options = { "device_id": device_id, "arena_extend_strategy": "kSameAsRequested", @@ -353,7 +354,7 @@ def __init__( enable_gpu_graph: bool = False, gpu_graph_id: int = -1, stream: int = 0, - buffer_sharing: Optional[Dict[str, str]] = None, + buffer_sharing: dict[str, str] | None = None, ): super().__init__(ort_session, device, enable_gpu_graph) if buffer_sharing: @@ -379,7 +380,7 @@ def get_run_options(self, disable_cuda_graph_in_run: bool = False) -> RunOptions return options - def infer(self, feed_dict: Dict[str, torch.Tensor], disable_cuda_graph_in_run: bool = False): + def infer(self, feed_dict: dict[str, torch.Tensor], disable_cuda_graph_in_run: bool = False): run_options = self.get_run_options(disable_cuda_graph_in_run) if self.stream: @@ -411,7 +412,7 @@ def get_binding( self, shape_dict: ShapeDict, use_cuda_graph: bool = False, - buffer_sharing: Optional[Dict[str, str]] = None, + buffer_sharing: dict[str, str] | None = None, ) -> GpuBinding: for gpu_graph_binding in self.graph_bindings: # Found a cuda graph that captured with the same shape diff --git a/onnxruntime/python/tools/transformers/large_model_exporter.py b/onnxruntime/python/tools/transformers/large_model_exporter.py index 0eaccc0fafcc4..29829a6c475d9 100644 --- a/onnxruntime/python/tools/transformers/large_model_exporter.py +++ b/onnxruntime/python/tools/transformers/large_model_exporter.py @@ -6,13 +6,13 @@ """ Export LLM to onnx """ + import argparse import inspect import math import os import tempfile from pathlib import Path -from typing import Optional import onnx import torch @@ -49,7 +49,7 @@ def get_model_parameter_size(model: nn.Module): return all_size -def initialize_model_and_sample_inputs(hf_model: str, cache_dir: Optional[str], tokenizer=None): +def initialize_model_and_sample_inputs(hf_model: str, cache_dir: str | None, tokenizer=None): """ get the pretrained torch model from hugginface, and sample model-inputs @@ -154,7 +154,7 @@ def hook_for_inputs(_, inputs, kwargs): for key, value in user_inputs[1].items(): idx = input_keys.index(key) onnx_inputs[idx] = value - for idx, (key, value) in enumerate(zip(input_keys, onnx_inputs)): + for idx, (key, value) in enumerate(zip(input_keys, onnx_inputs, strict=False)): if type(value) is torch.Tensor: value.to(model.device) if "use_cache" in key: @@ -173,8 +173,8 @@ def move_to_appropriate_device(model: nn.Module, sample_inputs_tp: tuple) -> nn. """ total_mem_per_cpu = torch.cuda.get_device_properties(0).total_memory / 1024 / 1024 - print(f"Model_Size = {get_model_parameter_size(model)/1024} GB") - print(f"total_mem_per_cpu = {total_mem_per_cpu/1024} GB") + print(f"Model_Size = {get_model_parameter_size(model) / 1024} GB") + print(f"total_mem_per_cpu = {total_mem_per_cpu / 1024} GB") if get_model_parameter_size(model) > total_mem_per_cpu * 0.45: device_collection = [torch.device(i) for i in range(torch.cuda.device_count())] if len(device_collection) > 1: @@ -228,9 +228,9 @@ def fetch_onnx_inputs_outputs_name( onnx_inp_names = tuple( [torch_input_names[i] for i in range(len(torch_input_names)) if isinstance(onnx_inputs[i], torch.Tensor)] ) - assert ( - "input_ids" in onnx_inp_names and "attention_mask" in onnx_inp_names - ), "input_ids and attention_mask must be existed in inputs" + assert "input_ids" in onnx_inp_names and "attention_mask" in onnx_inp_names, ( + "input_ids and attention_mask must be existed in inputs" + ) onnx_out_names = ("logits",) onnx_dynamic_axes = { "input_ids": {0: "batch_size", 1: "seq_len"}, @@ -308,7 +308,7 @@ def do_export_internal(model: nn.Module, onnx_io_tuple: tuple, onnx_inputs: tupl @torch.no_grad() -def export_onnx(hf_model: str, cache_dir: Optional[str], onnx_path_str: str, with_past: bool, opset: int): +def export_onnx(hf_model: str, cache_dir: str | None, onnx_path_str: str, with_past: bool, opset: int): """ do export model: torch model diff --git a/onnxruntime/python/tools/transformers/machine_info.py b/onnxruntime/python/tools/transformers/machine_info.py index d4194abbd14d3..7f9a0110bcd9f 100644 --- a/onnxruntime/python/tools/transformers/machine_info.py +++ b/onnxruntime/python/tools/transformers/machine_info.py @@ -10,7 +10,6 @@ import logging import platform from os import environ -from typing import Dict, List import cpuinfo import psutil @@ -66,12 +65,12 @@ def get_machine_info(self): } return machine_info - def get_memory_info(self) -> Dict: + def get_memory_info(self) -> dict: """Get memory info""" mem = psutil.virtual_memory() return {"total": mem.total, "available": mem.available} - def _try_get(self, cpu_info: Dict, names: List) -> str: + def _try_get(self, cpu_info: dict, names: list) -> str: for name in names: if name in cpu_info: value = cpu_info[name] @@ -80,7 +79,7 @@ def _try_get(self, cpu_info: Dict, names: List) -> str: return value return "" - def get_cpu_info(self) -> Dict: + def get_cpu_info(self) -> dict: """Get CPU info""" cpu_info = cpuinfo.get_cpu_info() @@ -94,7 +93,7 @@ def get_cpu_info(self) -> Dict: "processor": platform.uname().processor, } - def get_gpu_info_by_nvml(self) -> Dict: + def get_gpu_info_by_nvml(self) -> dict: """Get GPU info using nvml""" gpu_info_list = [] driver_version = None @@ -122,7 +121,7 @@ def get_gpu_info_by_nvml(self) -> Dict: result["cuda_visible"] = environ["CUDA_VISIBLE_DEVICES"] return result - def get_related_packages(self) -> List[str]: + def get_related_packages(self) -> list[str]: import pkg_resources installed_packages = pkg_resources.working_set @@ -142,7 +141,7 @@ def get_related_packages(self) -> List[str]: related_packages_list = {i.key: i.version for i in installed_packages if i.key in related_packages} return related_packages_list - def get_onnxruntime_info(self) -> Dict: + def get_onnxruntime_info(self) -> dict: try: import onnxruntime @@ -159,7 +158,7 @@ def get_onnxruntime_info(self) -> Dict: self.logger.exception(exception, False) return None - def get_pytorch_info(self) -> Dict: + def get_pytorch_info(self) -> dict: try: import torch @@ -177,7 +176,7 @@ def get_pytorch_info(self) -> Dict: self.logger.exception(exception, False) return None - def get_tensorflow_info(self) -> Dict: + def get_tensorflow_info(self) -> dict: try: import tensorflow as tf diff --git a/onnxruntime/python/tools/transformers/metrics.py b/onnxruntime/python/tools/transformers/metrics.py index 282c75ba8f6a5..74a34df28c019 100644 --- a/onnxruntime/python/tools/transformers/metrics.py +++ b/onnxruntime/python/tools/transformers/metrics.py @@ -6,7 +6,6 @@ import datetime import json -from typing import Optional import pandas as pd @@ -30,10 +29,10 @@ def to_dict(self): class ModelInfo(BaseObject): def __init__( self, - full_name: Optional[str] = None, - is_huggingface: Optional[bool] = False, - is_text_generation: Optional[bool] = False, - short_name: Optional[str] = None, + full_name: str | None = None, + is_huggingface: bool | None = False, + is_text_generation: bool | None = False, + short_name: str | None = None, ): super().__init__() self.full_name = full_name @@ -46,9 +45,9 @@ def __init__( class BackendOptions(BaseObject): def __init__( self, - enable_profiling: Optional[bool] = False, - execution_provider: Optional[str] = None, - use_io_binding: Optional[bool] = False, + enable_profiling: bool | None = False, + execution_provider: str | None = None, + use_io_binding: bool | None = False, ): super().__init__() self.enable_profiling = enable_profiling @@ -59,12 +58,12 @@ def __init__( class Config(BaseObject): def __init__( self, - backend: Optional[str] = "onnxruntime", - batch_size: Optional[int] = 1, - seq_length: Optional[int] = 0, - precision: Optional[str] = "fp32", - warmup_runs: Optional[int] = 1, - measured_runs: Optional[int] = 10, + backend: str | None = "onnxruntime", + batch_size: int | None = 1, + seq_length: int | None = 0, + precision: str | None = "fp32", + warmup_runs: int | None = 1, + measured_runs: int | None = 10, ): super().__init__() self.backend = backend @@ -80,11 +79,11 @@ def __init__( class Metadata(BaseObject): def __init__( self, - device: Optional[str] = None, - package_name: Optional[str] = None, - package_version: Optional[str] = None, - platform: Optional[str] = None, - python_version: Optional[str] = None, + device: str | None = None, + package_name: str | None = None, + package_version: str | None = None, + platform: str | None = None, + python_version: str | None = None, ): super().__init__() self.device = device @@ -97,9 +96,9 @@ def __init__( class Metrics(BaseObject): def __init__( self, - latency_ms_mean: Optional[float] = 0.0, - throughput_qps: Optional[float] = 0.0, - max_memory_usage_GB: Optional[float] = 0.0, + latency_ms_mean: float | None = 0.0, + throughput_qps: float | None = 0.0, + max_memory_usage_GB: float | None = 0.0, ): super().__init__() self.latency_ms_mean = latency_ms_mean @@ -116,10 +115,10 @@ def __init__( device: str, package_name: str, package_version: str, - batch_size: Optional[int] = 1, - warmup_runs: Optional[int] = 1, - measured_runs: Optional[int] = 10, - trigger_date: Optional[str] = None, + batch_size: int | None = 1, + warmup_runs: int | None = 1, + measured_runs: int | None = 10, + trigger_date: str | None = None, ): self.config = Config() self.metrics = Metrics() diff --git a/onnxruntime/python/tools/transformers/models/bart/utils/export_helper.py b/onnxruntime/python/tools/transformers/models/bart/utils/export_helper.py index 8b7c18dbde7d9..85d2fa9a64e23 100644 --- a/onnxruntime/python/tools/transformers/models/bart/utils/export_helper.py +++ b/onnxruntime/python/tools/transformers/models/bart/utils/export_helper.py @@ -4,13 +4,12 @@ # license information. # -------------------------------------------------------------------------- -from typing import List, Tuple import torch from transformers import BartConfig, BartForConditionalGeneration, BartTokenizer -def group_by_self_and_cross(present_key_values: Tuple[torch.Tensor], concat: bool = False): +def group_by_self_and_cross(present_key_values: tuple[torch.Tensor], concat: bool = False): """Categorize present_key_values into self and cross attention. Split present state from grouped by layer to grouped by self/cross attention. @@ -27,8 +26,8 @@ def group_by_self_and_cross(present_key_values: Tuple[torch.Tensor], concat: boo present_self (Tuple[torch.Tensor]): present key and values from self attention present_cross (Tuple[torch.Tensor]): present key and values from cross attention """ - present_self: List[torch.Tensor] = [] - present_cross: List[torch.Tensor] = [] + present_self: list[torch.Tensor] = [] + present_cross: list[torch.Tensor] = [] for _, present_layer_i in enumerate(present_key_values): assert len(present_layer_i) == 4, f"Expected to have four items. Got {len(present_layer_i)}" present_key_self, present_value_self, present_key_cross, present_value_cross = present_layer_i @@ -40,7 +39,7 @@ def group_by_self_and_cross(present_key_values: Tuple[torch.Tensor], concat: boo return present_self, present_cross -def back_group_by_layer(past_key_values: Tuple[Tuple[torch.Tensor]]): +def back_group_by_layer(past_key_values: tuple[tuple[torch.Tensor]]): """Categorize present_key_values from self and cross attention to layer by layer. Reorder past state from grouped by self/cross attention to grouped by layer. @@ -70,7 +69,7 @@ def back_group_by_layer(past_key_values: Tuple[Tuple[torch.Tensor]]): return past_tuples -def get_input_names(past_key_values: Tuple[Tuple[torch.Tensor]], encoder=True): +def get_input_names(past_key_values: tuple[tuple[torch.Tensor]], encoder=True): """Process input names of model wrapper. Args: @@ -89,7 +88,7 @@ def get_input_names(past_key_values: Tuple[Tuple[torch.Tensor]], encoder=True): return names -def get_output_names(past_key_values: Tuple[torch.Tensor]): +def get_output_names(past_key_values: tuple[torch.Tensor]): """Process output names of model wrapper. As cross attention is unchanged during every iteration of beam search, diff --git a/onnxruntime/python/tools/transformers/models/bart/utils/export_summarization_edinit.py b/onnxruntime/python/tools/transformers/models/bart/utils/export_summarization_edinit.py index 8a610fb17671b..f8d13ca041349 100644 --- a/onnxruntime/python/tools/transformers/models/bart/utils/export_summarization_edinit.py +++ b/onnxruntime/python/tools/transformers/models/bart/utils/export_summarization_edinit.py @@ -6,7 +6,7 @@ import os import time -from typing import Any, Dict, Optional +from typing import Any import torch from transformers import BartConfig, BartForConditionalGeneration, file_utils @@ -87,8 +87,8 @@ def _create_encoder_export(args, config: BartConfig): """ def _prepare_encoder_decoder_kwargs_for_generation( - self, input_ids: torch.Tensor, model_kwargs, model_input_name: Optional[str] = None - ) -> Dict[str, Any]: + self, input_ids: torch.Tensor, model_kwargs, model_input_name: str | None = None + ) -> dict[str, Any]: # retrieve encoder hidden states # 1. get encoder encoder = self.get_encoder() diff --git a/onnxruntime/python/tools/transformers/models/bart/utils/export_summarization_enc_dec_past.py b/onnxruntime/python/tools/transformers/models/bart/utils/export_summarization_enc_dec_past.py index afd01ae9d025f..475e4c5aecd18 100644 --- a/onnxruntime/python/tools/transformers/models/bart/utils/export_summarization_enc_dec_past.py +++ b/onnxruntime/python/tools/transformers/models/bart/utils/export_summarization_enc_dec_past.py @@ -208,7 +208,7 @@ def forward(self, input_ids: torch.Tensor, attention_mask: torch.Tensor, **kwarg # Test the generated model with onnxruntime print("========== ORT inference test on Decoder ... ==========") - ort_inputs = {name: value.cpu().numpy() for name, value in zip(input_names, inputs)} + ort_inputs = {name: value.cpu().numpy() for name, value in zip(input_names, inputs, strict=False)} # NOTE: encoder_hidden_states is not used and deleted ort_inputs.pop("encoder_hidden_states") sess_options = SessionOptions() @@ -216,7 +216,7 @@ def forward(self, input_ids: torch.Tensor, attention_mask: torch.Tensor, **kwarg sess = InferenceSession(onnx_model_path, sess_options, providers=["CPUExecutionProvider"]) out = sess.run(None, ort_inputs) - for ort_out, torch_out in zip(out, [logits, *present]): + for ort_out, torch_out in zip(out, [logits, *present], strict=False): torch.testing.assert_close(ort_out, torch_out.cpu().numpy(), check_dtype=True, atol=1e-4, rtol=1e-2) print("========== [SUCCESS] ORT inference test on Decoder ==========") diff --git a/onnxruntime/python/tools/transformers/models/bert/eval_squad.py b/onnxruntime/python/tools/transformers/models/bert/eval_squad.py index 8797fd9c2cfaf..680b3455ade2d 100644 --- a/onnxruntime/python/tools/transformers/models/bert/eval_squad.py +++ b/onnxruntime/python/tools/transformers/models/bert/eval_squad.py @@ -33,7 +33,7 @@ from importlib_metadata import PackageNotFoundError, version from pathlib import Path -from typing import Any, Dict, List, Optional +from typing import Any from datasets import load_dataset from evaluate import evaluator @@ -60,7 +60,7 @@ def get_package_version(package_name: str): def load_onnx_model( - model_id: str, onnx_path: Optional[str] = None, provider="CUDAExecutionProvider", use_io_binding: bool = False + model_id: str, onnx_path: str | None = None, provider="CUDAExecutionProvider", use_io_binding: bool = False ): """Load onnx model given pretrained model name and optional ONNX model path. If onnx_path is None, the default onnx model from optimum will be used. @@ -95,7 +95,7 @@ def load_onnx_model( return model, onnx_path -def output_details(results: List[Dict[str, Any]], csv_filename: str): +def output_details(results: list[dict[str, Any]], csv_filename: str): """Output a CSV file with detail of each test results. Args: @@ -136,7 +136,7 @@ def output_details(results: List[Dict[str, Any]], csv_filename: str): print(f"Detail results are saved to csv file: {csv_filename}") -def output_summary(results: List[Dict[str, Any]], csv_filename: str, metric_name: str): +def output_summary(results: list[dict[str, Any]], csv_filename: str, metric_name: str): """Output a CSV file with summary of a metric on combinations of batch_size and sequence_length. Args: diff --git a/onnxruntime/python/tools/transformers/models/gpt2/gpt2_helper.py b/onnxruntime/python/tools/transformers/models/gpt2/gpt2_helper.py index 9153193a4974a..b405c19b04689 100644 --- a/onnxruntime/python/tools/transformers/models/gpt2/gpt2_helper.py +++ b/onnxruntime/python/tools/transformers/models/gpt2/gpt2_helper.py @@ -12,7 +12,6 @@ import tempfile import time from pathlib import Path -from typing import Dict, List, Tuple, Union import numpy import onnx @@ -139,17 +138,17 @@ class Gpt2Inputs: def __init__(self, input_ids, position_ids, attention_mask, past): self.input_ids: torch.LongTensor = input_ids self.position_ids: torch.LongTensor = position_ids - self.attention_mask: Union[torch.LongTensor, torch.FloatTensor, torch.HalfTensor] = attention_mask - self.past: Union[List[torch.FloatTensor], List[torch.HalfTensor]] = past + self.attention_mask: torch.LongTensor | torch.FloatTensor | torch.HalfTensor = attention_mask + self.past: list[torch.FloatTensor] | list[torch.HalfTensor] = past - def to_list(self) -> List: + def to_list(self) -> list: input_list = [v for v in [self.input_ids, self.position_ids, self.attention_mask] if v is not None] if self.past: input_list.extend(self.past) return input_list - def to_tuple(self) -> Tuple: + def to_tuple(self) -> tuple: return tuple(v for v in [self.input_ids, self.position_ids, self.attention_mask, self.past] if v is not None) def to_fp32(self): @@ -241,7 +240,7 @@ def get_output_shapes( sequence_length: int, config: GPT2Config, model_class: str = "GPT2LMHeadModel", - ) -> Dict[str, List[int]]: + ) -> dict[str, list[int]]: """Returns a dictionary with output name as key, and shape as value.""" num_attention_heads = config.num_attention_heads hidden_size = config.hidden_size @@ -541,7 +540,7 @@ def optimize_onnx( @staticmethod def auto_mixed_precision( onnx_model: OnnxModel, - op_block_list: List[str] = [ # noqa: B006 + op_block_list: list[str] = [ # noqa: B006 "Add", "LayerNormalization", "SkipLayerNormalization", @@ -698,8 +697,8 @@ def get_outputs_from_io_binding_buffer(ort_session, output_buffers, output_shape def onnxruntime_inference_with_binded_io( ort_session, inputs: Gpt2Inputs, - output_buffers: Dict[str, torch.Tensor], - output_shapes: Dict[str, List[int]], + output_buffers: dict[str, torch.Tensor], + output_shapes: dict[str, list[int]], total_runs: int = 0, return_numpy: bool = True, include_copy_output_latency: bool = False, @@ -889,11 +888,11 @@ def test_parity( result["nan_rate"] = (total_test_cases - len(max_abs_diff_list)) * 1.0 / total_test_cases logger.info( - f"Parity Test Cases={total_test_cases}; Passed={passed_test_cases}; Nan={total_test_cases-len(max_abs_diff_list)}; Top1_Matched={top1_matched_cases}" + f"Parity Test Cases={total_test_cases}; Passed={passed_test_cases}; Nan={total_test_cases - len(max_abs_diff_list)}; Top1_Matched={top1_matched_cases}" ) if passed_test_cases > 0.95 * total_test_cases: - logger.info(f"Parity is good: passed rate={int(passed_test_cases*100/total_test_cases):.0f}%") + logger.info(f"Parity is good: passed rate={int(passed_test_cases * 100 / total_test_cases):.0f}%") return result diff --git a/onnxruntime/python/tools/transformers/models/gpt2/gpt2_parity.py b/onnxruntime/python/tools/transformers/models/gpt2/gpt2_parity.py index b039f1351b1d0..5d002d3b5e707 100644 --- a/onnxruntime/python/tools/transformers/models/gpt2/gpt2_parity.py +++ b/onnxruntime/python/tools/transformers/models/gpt2/gpt2_parity.py @@ -366,7 +366,7 @@ def run_tuning_step0(task, fp16_baseline, all_ops, optimized_ops): # Only weights in FP16 task.run( - fp16_baseline + fp32_io + ["--op_block_list"] + [o for o in all_ops] + ["--force_fp16_initializers"], + fp16_baseline + fp32_io + ["--op_block_list"] + list(all_ops) + ["--force_fp16_initializers"], "FP32 except weights in FP16", ) diff --git a/onnxruntime/python/tools/transformers/models/llama/benchmark.py b/onnxruntime/python/tools/transformers/models/llama/benchmark.py index d05de369b3d51..61bfc950735af 100644 --- a/onnxruntime/python/tools/transformers/models/llama/benchmark.py +++ b/onnxruntime/python/tools/transformers/models/llama/benchmark.py @@ -642,9 +642,9 @@ def get_args(rank=0): # Check that only one (batch_size, sequence_length) combination is set for profiling if args.profile: - assert ( - len(args.batch_sizes) == 1 and len(args.sequence_lengths) == 1 - ), "Please provide only one (batch_size, sequence_length) combination for profiling" + assert len(args.batch_sizes) == 1 and len(args.sequence_lengths) == 1, ( + "Please provide only one (batch_size, sequence_length) combination for profiling" + ) return args diff --git a/onnxruntime/python/tools/transformers/models/llama/benchmark_e2e.py b/onnxruntime/python/tools/transformers/models/llama/benchmark_e2e.py index 9f6f86fc28fae..db78d837f8225 100644 --- a/onnxruntime/python/tools/transformers/models/llama/benchmark_e2e.py +++ b/onnxruntime/python/tools/transformers/models/llama/benchmark_e2e.py @@ -259,14 +259,16 @@ def get_args(): help="Use when GroupQueryAttention (GQA) is in ONNX model", ) - parser.add_argument( - "--anomaly-filtering", - default=False, - action="store_true", - help="Use this flag to filter anomaly accelerator times for tokens generated. \ + ( + parser.add_argument( + "--anomaly-filtering", + default=False, + action="store_true", + help="Use this flag to filter anomaly accelerator times for tokens generated. \ This may give more accurate latency and throughput metrics for tokens generated. \ Wall-clock metrics are still reported with anomaly times though.", - ), + ), + ) parser.add_argument( "-b", diff --git a/onnxruntime/python/tools/transformers/models/llama/convert_to_onnx.py b/onnxruntime/python/tools/transformers/models/llama/convert_to_onnx.py index f5446ed718087..89fd613ecbbc2 100644 --- a/onnxruntime/python/tools/transformers/models/llama/convert_to_onnx.py +++ b/onnxruntime/python/tools/transformers/models/llama/convert_to_onnx.py @@ -427,7 +427,7 @@ def convert_to_float16(args: argparse.Namespace, old_paths: list[str], rank: int new_paths = [decoder_model_fp16_path, decoder_with_past_model_fp16_path, decoder_merged_model_fp16_path] logger.info("Converting to float16...") - for fp32_path, fp16_path in zip(old_paths, new_paths): + for fp32_path, fp16_path in zip(old_paths, new_paths, strict=False): if os.path.exists(fp32_path): model = OnnxModel(onnx.load_model(fp32_path, load_external_data=True)) model.convert_float_to_float16(keep_io_types=False) @@ -455,9 +455,8 @@ def smooth_quant( decoder_model_int8_path: str, decoder_with_past_model_int8_path: str, ): - from neural_compressor import PostTrainingQuantConfig + from neural_compressor import PostTrainingQuantConfig, set_workspace from neural_compressor import quantization as intel_quantization - from neural_compressor import set_workspace from onnx.external_data_helper import load_external_data_for_model from quant_kv_dataloader import QuantKVDataLoader @@ -868,7 +867,7 @@ def main(): # Run the optimizer script. logger.info("Optimizing models...") - for orig_path, opt_path in zip(old_paths, new_paths): + for orig_path, opt_path in zip(old_paths, new_paths, strict=False): if os.path.exists(orig_path): optimize_export(args, l_config, input_path=orig_path, output_path=opt_path, world_size=world_size) @@ -913,7 +912,7 @@ def main(): ) logger.info("Quantizing to int8...") - for fp32_path, int8_path in zip(old_paths, new_paths): + for fp32_path, int8_path in zip(old_paths, new_paths, strict=False): if os.path.exists(fp32_path): ort_quantization.quantize_dynamic( fp32_path, @@ -953,7 +952,7 @@ def main(): ) new_paths = [decoder_model_int4_path, decoder_with_past_model_int4_path, decoder_merged_model_int4_path] - for fp_path, int4_path in zip(old_paths, new_paths): + for fp_path, int4_path in zip(old_paths, new_paths, strict=False): if os.path.exists(fp_path): model = onnx.load_model(fp_path, load_external_data=True) quant = MatMul4BitsQuantizer( diff --git a/onnxruntime/python/tools/transformers/models/llama/llama_inputs.py b/onnxruntime/python/tools/transformers/models/llama/llama_inputs.py index d8a1221277e43..025d57f0b2d5d 100644 --- a/onnxruntime/python/tools/transformers/models/llama/llama_inputs.py +++ b/onnxruntime/python/tools/transformers/models/llama/llama_inputs.py @@ -84,9 +84,7 @@ def get_sample_with_past_kv_inputs( attention_mask = attention_mask.numpy() if engine == "ort" else attention_mask.to(device) position_ids = position_ids.numpy() if engine == "ort" else position_ids.to(device) past_kv = ( - flatten_past_kv_inputs(past_kv) - if engine == "ort" - else list(map(lambda kv: (kv[0].to(device), kv[1].to(device)), past_kv)) + flatten_past_kv_inputs(past_kv) if engine == "ort" else [(kv[0].to(device), kv[1].to(device)) for kv in past_kv] ) if not return_dict: @@ -143,9 +141,7 @@ def get_merged_sample_with_past_kv_inputs( attention_mask = attention_mask.numpy() if engine == "ort" else attention_mask.to(device) position_ids = position_ids.numpy() if engine == "ort" else position_ids.to(device) past_kv = ( - flatten_past_kv_inputs(past_kv) - if engine == "ort" - else list(map(lambda kv: (kv[0].to(device), kv[1].to(device)), past_kv)) + flatten_past_kv_inputs(past_kv) if engine == "ort" else [(kv[0].to(device), kv[1].to(device)) for kv in past_kv] ) if not return_dict: @@ -289,7 +285,7 @@ def enable_past_present_share_buffer(ort_inputs: dict, past_seq_len: int, max_se # Verify ONNX Runtime inputs with model def verify_ort_inputs(model: InferenceSession, ort_inputs: dict): # Check that all model inputs will be provided - model_inputs = set(map(lambda model_input: model_input.name, model.get_inputs())) + model_inputs = {model_input.name for model_input in model.get_inputs()} user_inputs = set(ort_inputs.keys()) missing_inputs = model_inputs - user_inputs if len(missing_inputs): @@ -317,7 +313,7 @@ def add_io_bindings_as_ortvalues( ): io_binding = model.io_binding() - model_inputs = set(map(lambda i: i.name, model.get_inputs())) + model_inputs = {i.name for i in model.get_inputs()} for k, v in ort_inputs.items(): # Use this check to handle scenarios such as INT4 CUDA and FP16 CUDA models with # GQA + RotaryEmbedding fusion where `position_ids` is removed as an ONNX model input diff --git a/onnxruntime/python/tools/transformers/models/longformer/benchmark_longformer.py b/onnxruntime/python/tools/transformers/models/longformer/benchmark_longformer.py index ab92a12343732..c7e0e31765a4f 100644 --- a/onnxruntime/python/tools/transformers/models/longformer/benchmark_longformer.py +++ b/onnxruntime/python/tools/transformers/models/longformer/benchmark_longformer.py @@ -41,7 +41,7 @@ import traceback from concurrent.futures import ProcessPoolExecutor from datetime import datetime -from typing import Any, Dict, List +from typing import Any import benchmark_helper import numpy as np @@ -63,7 +63,7 @@ def test_torch_latency( global_lengths, test_times, num_threads, -) -> List[Dict[str, Any]]: +) -> list[dict[str, Any]]: if num_threads > 0: torch.set_num_threads(num_threads) @@ -143,14 +143,14 @@ def test_ort_latency( use_compact_memory=False, use_half4=False, disable_parity=False, -) -> List[Dict[str, Any]]: +) -> list[dict[str, Any]]: results = [] for batch_size in batch_sizes: for sequence_length in sequence_lengths: for global_length in global_lengths: - assert ( - global_length <= model.config.attention_window[0] - ), "Limitation of current implementation: number of global token <= attention_window" + assert global_length <= model.config.attention_window[0], ( + "Limitation of current implementation: number of global token <= attention_window" + ) logger.info( f"Testing batch_size={batch_size} sequence_length={sequence_length} global_length={global_length} " @@ -250,7 +250,7 @@ def test_ort_memory( global_length, test_times, num_threads, -) -> Dict[str, Any]: +) -> dict[str, Any]: logger.info( f"Testing memory for model={onnx_model_path}, batch_size={batch_size}, sequence_length={sequence_length}, " f"global_length={global_length}, test_times={test_times}, num_threads={num_threads}" @@ -307,7 +307,7 @@ def find_onnx_model(model_name, onnx_dir="."): return onnx_model_path -def test_memory(args, device) -> Dict[str, Any]: +def test_memory(args, device) -> dict[str, Any]: if len(args.batch_sizes) > 1: raise RuntimeError("For memory test, only one batch_size (-b) is allowed.") if len(args.sequence_lengths) > 1: @@ -330,7 +330,7 @@ def test_memory(args, device) -> Dict[str, Any]: ) -def test_ort(args, device) -> List[Dict[str, Any]]: +def test_ort(args, device) -> list[dict[str, Any]]: model_name = args.model onnx_model_path = find_onnx_model(model_name) if not args.onnx else args.onnx @@ -385,7 +385,7 @@ def test_ort(args, device) -> List[Dict[str, Any]]: ) -def test_torch(args, device) -> List[Dict[str, Any]]: +def test_torch(args, device) -> list[dict[str, Any]]: model = load_torch_model(args.model, device) return test_torch_latency( device, @@ -399,7 +399,7 @@ def test_torch(args, device) -> List[Dict[str, Any]]: ) -def test_latency(args, device) -> List[Dict[str, Any]]: +def test_latency(args, device) -> list[dict[str, Any]]: if args.engine == "onnxruntime": return test_ort(args, device) @@ -550,7 +550,7 @@ def output_details(results, csv_filename): print(f"Detail results are saved to csv file: {csv_filename}") -def run(args) -> List[Dict[str, Any]]: +def run(args) -> list[dict[str, Any]]: torch.set_grad_enabled(False) # set random seed manually to get deterministic results @@ -565,7 +565,7 @@ def run(args) -> List[Dict[str, Any]]: return test_latency(args, device) -def launch_test(arguments) -> List[Dict[str, Any]]: +def launch_test(arguments) -> list[dict[str, Any]]: if not torch.cuda.is_available(): raise RuntimeError("Please install PyTorch with Cuda, and use a machine with GPU for testing gpu performance.") diff --git a/onnxruntime/python/tools/transformers/models/longformer/longformer_helper.py b/onnxruntime/python/tools/transformers/models/longformer/longformer_helper.py index 1794bf75b4e6f..08a2ba629fbc3 100644 --- a/onnxruntime/python/tools/transformers/models/longformer/longformer_helper.py +++ b/onnxruntime/python/tools/transformers/models/longformer/longformer_helper.py @@ -6,7 +6,6 @@ # This script helps creating dummy inputs for Longformer model. import logging -from typing import Dict, List, Tuple, Union import numpy import torch @@ -23,16 +22,16 @@ class LongformerInputs: def __init__(self, input_ids, attention_mask, global_attention_mask): self.input_ids: torch.LongTensor = input_ids - self.attention_mask: Union[torch.FloatTensor, torch.HalfTensor] = attention_mask - self.global_attention_mask: Union[torch.FloatTensor, torch.HalfTensor] = global_attention_mask + self.attention_mask: torch.FloatTensor | torch.HalfTensor = attention_mask + self.global_attention_mask: torch.FloatTensor | torch.HalfTensor = global_attention_mask - def to_list(self) -> List: + def to_list(self) -> list: return [v for v in [self.input_ids, self.attention_mask, self.global_attention_mask] if v is not None] - def to_tuple(self) -> Tuple: + def to_tuple(self) -> tuple: return tuple(v for v in self.to_list()) - def get_ort_inputs(self) -> Dict: + def get_ort_inputs(self) -> dict: return { "input_ids": numpy.ascontiguousarray(self.input_ids.cpu().numpy()), "attention_mask": numpy.ascontiguousarray(self.attention_mask.cpu().numpy()), @@ -69,7 +68,7 @@ def get_dummy_inputs( return LongformerInputs(input_ids, attention_mask, global_attention_mask) @staticmethod - def get_output_shapes(batch_size: int, sequence_length: int, hidden_size: int) -> Dict[str, List[int]]: + def get_output_shapes(batch_size: int, sequence_length: int, hidden_size: int) -> dict[str, list[int]]: """Returns a dictionary with output name as key, and shape as value.""" return { "last_state": [batch_size, sequence_length, hidden_size], diff --git a/onnxruntime/python/tools/transformers/models/sam2/benchmark_sam2.py b/onnxruntime/python/tools/transformers/models/sam2/benchmark_sam2.py index f75a4527be57d..16d71d5057b02 100644 --- a/onnxruntime/python/tools/transformers/models/sam2/benchmark_sam2.py +++ b/onnxruntime/python/tools/transformers/models/sam2/benchmark_sam2.py @@ -11,8 +11,8 @@ import csv import statistics import time +from collections.abc import Mapping from datetime import datetime -from typing import List, Mapping, Optional import torch from image_decoder import SAM2ImageDecoder @@ -84,7 +84,7 @@ def __init__( def __repr__(self): return f"{vars(self)}" - def shape_dict(self) -> Mapping[str, List[int]]: + def shape_dict(self) -> Mapping[str, list[int]]: if self.component == "image_encoder": return encoder_shape_dict(self.batch_size, self.height, self.width) else: @@ -283,7 +283,7 @@ def run_torch(config: TestConfig): def run_test( args: argparse.Namespace, - csv_writer: Optional[csv.DictWriter] = None, + csv_writer: csv.DictWriter | None = None, ): use_gpu: bool = args.use_gpu enable_cuda_graph: bool = args.use_cuda_graph diff --git a/onnxruntime/python/tools/transformers/models/sam2/image_decoder.py b/onnxruntime/python/tools/transformers/models/sam2/image_decoder.py index 5eafb29713126..07ed150631f50 100644 --- a/onnxruntime/python/tools/transformers/models/sam2/image_decoder.py +++ b/onnxruntime/python/tools/transformers/models/sam2/image_decoder.py @@ -212,7 +212,6 @@ def test_decoder_onnx( onnx_model_path: str, multimask_output=False, ): - batch_size = 1 image = random_sam2_input_image(batch_size) sam2_encoder = SAM2ImageEncoder(sam2_model).cpu() diff --git a/onnxruntime/python/tools/transformers/models/sam2/image_encoder.py b/onnxruntime/python/tools/transformers/models/sam2/image_encoder.py index b9f30d0371dbe..c5ce339732063 100644 --- a/onnxruntime/python/tools/transformers/models/sam2/image_encoder.py +++ b/onnxruntime/python/tools/transformers/models/sam2/image_encoder.py @@ -75,7 +75,7 @@ def forward( feats = [ feat.permute(1, 2, 0).reshape(1, -1, *feat_size) - for feat, feat_size in zip(vision_feats[::-1], feat_sizes[::-1]) + for feat, feat_size in zip(vision_feats[::-1], feat_sizes[::-1], strict=False) ][::-1] if nvtx_helper is not None: diff --git a/onnxruntime/python/tools/transformers/models/sam2/sam2_demo.py b/onnxruntime/python/tools/transformers/models/sam2/sam2_demo.py index 9533e2652f8a5..7f43724a6343f 100644 --- a/onnxruntime/python/tools/transformers/models/sam2/sam2_demo.py +++ b/onnxruntime/python/tools/transformers/models/sam2/sam2_demo.py @@ -3,7 +3,6 @@ # Licensed under the MIT License. # -------------------------------------------------------------------------- import os -from typing import Union import matplotlib.image as mpimg import matplotlib.pyplot as plt @@ -64,7 +63,7 @@ def show_masks( output_image_file_prefix=None, image_files=None, ): - for i, (mask, score) in enumerate(zip(masks, scores)): + for i, (mask, score) in enumerate(zip(masks, scores, strict=False)): plt.figure(figsize=(10, 10)) plt.imshow(image) show_mask(mask, plt.gca(), borders=borders) @@ -76,7 +75,7 @@ def show_masks( show_box(box_coords, plt.gca()) if len(scores) > 1: - plt.title(f"Mask {i+1}, Score: {score:.3f}", fontsize=18) + plt.title(f"Mask {i + 1}, Score: {score:.3f}", fontsize=18) plt.axis("off") if output_image_file_prefix: @@ -92,7 +91,7 @@ def show_masks( def get_predictor( sam2_dir: str, - device: Union[str, torch.device], + device: str | torch.device, dtype: torch.dtype, model_type="sam2_hiera_large", engine="torch", @@ -303,7 +302,7 @@ def run_demo( def show_all_images(left_images, right_images, suffix=""): # Show images in two rows since display screen is horizontal in most cases. fig, axes = plt.subplots(nrows=2, ncols=len(left_images), figsize=(19.20, 10.80)) - for i, (left_img_path, right_img_path) in enumerate(zip(left_images, right_images)): + for i, (left_img_path, right_img_path) in enumerate(zip(left_images, right_images, strict=False)): left_img = mpimg.imread(left_img_path) right_img = mpimg.imread(right_img_path) diff --git a/onnxruntime/python/tools/transformers/models/sam2/sam2_image_onnx_predictor.py b/onnxruntime/python/tools/transformers/models/sam2/sam2_image_onnx_predictor.py index 363b5daf461a4..2f34bfa9aa09a 100644 --- a/onnxruntime/python/tools/transformers/models/sam2/sam2_image_onnx_predictor.py +++ b/onnxruntime/python/tools/transformers/models/sam2/sam2_image_onnx_predictor.py @@ -4,7 +4,6 @@ # -------------------------------------------------------------------------- import logging -from typing import Optional, Tuple, Union import numpy as np import torch @@ -41,7 +40,7 @@ def create_session( onnx_path: str, session_options=None, provider="CUDAExecutionProvider", - device: Union[str, torch.device] = "cuda", + device: str | torch.device = "cuda", enable_cuda_graph=False, ) -> CudaSession: ort_session = create_ort_session( @@ -59,7 +58,7 @@ def __init__( image_decoder_onnx_path: str = "", image_decoder_multi_onnx_path: str = "", provider: str = "CUDAExecutionProvider", - device: Union[str, torch.device] = "cuda", + device: str | torch.device = "cuda", onnx_dtype: torch.dtype = torch.float32, mask_threshold=0.0, max_hole_area=0.0, @@ -114,7 +113,7 @@ def __init__( ) @torch.no_grad() - def set_image(self, image: Union[np.ndarray, Image]): + def set_image(self, image: np.ndarray | Image): """ Calculates the image embeddings for the provided image. @@ -136,9 +135,9 @@ def set_image(self, image: Union[np.ndarray, Image]): input_image = self._transforms(image) input_image = input_image[None, ...].to(self.device) - assert ( - len(input_image.shape) == 4 and input_image.shape[1] == 3 - ), f"input_image must be of size 1x3xHxW, got {input_image.shape}" + assert len(input_image.shape) == 4 and input_image.shape[1] == 3, ( + f"input_image must be of size 1x3xHxW, got {input_image.shape}" + ) # Computing image embeddings for the provided image io_shapes = encoder_shape_dict(batch_size=1, height=input_image.shape[2], width=input_image.shape[3]) @@ -162,14 +161,14 @@ def set_image(self, image: Union[np.ndarray, Image]): @torch.no_grad() def _predict( self, - point_coords: Optional[torch.Tensor], - point_labels: Optional[torch.Tensor], - boxes: Optional[torch.Tensor] = None, - mask_input: Optional[torch.Tensor] = None, + point_coords: torch.Tensor | None, + point_labels: torch.Tensor | None, + boxes: torch.Tensor | None = None, + mask_input: torch.Tensor | None = None, multimask_output: bool = True, return_logits: bool = False, img_idx: int = -1, - ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]: + ) -> tuple[torch.Tensor, torch.Tensor, torch.Tensor]: """ Predict masks for the given input prompts, using the currently set image. Input prompts are batched torch tensors and are expected to already be diff --git a/onnxruntime/python/tools/transformers/models/sam2/sam2_utils.py b/onnxruntime/python/tools/transformers/models/sam2/sam2_utils.py index 4ec4ccc274291..d983cefaaaeec 100644 --- a/onnxruntime/python/tools/transformers/models/sam2/sam2_utils.py +++ b/onnxruntime/python/tools/transformers/models/sam2/sam2_utils.py @@ -5,7 +5,7 @@ import logging import os import sys -from typing import List, Mapping, Union +from collections.abc import Mapping import torch from sam2.build_sam import build_sam2 @@ -27,7 +27,7 @@ def _get_model_cfg(model_type) -> str: return model_cfg -def load_sam2_model(sam2_dir, model_type, device: Union[str, torch.device] = "cpu") -> SAM2Base: +def load_sam2_model(sam2_dir, model_type, device: str | torch.device = "cpu") -> SAM2Base: checkpoints_dir = os.path.join(sam2_dir, "checkpoints") sam2_config_dir = os.path.join(sam2_dir, "sam2_configs") if not os.path.exists(sam2_dir): @@ -65,7 +65,7 @@ def sam2_onnx_path(output_dir, model_type, component, multimask_output=False, su ) -def encoder_shape_dict(batch_size: int, height: int, width: int) -> Mapping[str, List[int]]: +def encoder_shape_dict(batch_size: int, height: int, width: int) -> Mapping[str, list[int]]: assert height == 1024 and width == 1024, "Only 1024x1024 images are supported." return { "image": [batch_size, 3, height, width], diff --git a/onnxruntime/python/tools/transformers/models/stable_diffusion/README.md b/onnxruntime/python/tools/transformers/models/stable_diffusion/README.md index edef0d3ee5453..dc83f4dc220f0 100644 --- a/onnxruntime/python/tools/transformers/models/stable_diffusion/README.md +++ b/onnxruntime/python/tools/transformers/models/stable_diffusion/README.md @@ -203,35 +203,60 @@ This step will export stable diffusion 1.5 to ONNX model in float32 using script ``` curl https://raw.githubusercontent.com/huggingface/diffusers/v0.15.1/scripts/convert_stable_diffusion_checkpoint_to_onnx.py > convert_sd_onnx.py -python convert_sd_onnx.py --model_path runwayml/stable-diffusion-v1-5 --output_path ./sd_v1_5/fp32 +python convert_sd_onnx.py --model_path runwayml/stable-diffusion-v1-5 --output_path ./sd1.5_onnx/fp32 ``` For SDXL, use optimum to export the model: ``` pip install optimum diffusers onnx onnxruntime-gpu -optimum-cli export onnx --model stabilityai/stable-diffusion-xl-base-1.0 --task stable-diffusion-xl ./sd_xl_base_onnx +optimum-cli export onnx --model stabilityai/stable-diffusion-xl-base-1.0 --task stable-diffusion-xl ./sdxl_onnx/fp32 +``` + +#### Stable Diffusion 3.x and Flux 1.0 + +Stable Diffusion 3.x and Flux 1.0 requires transformers >= 4.45, and optimum > 1.23.3. +The default opset version for T5 is 12, which does not support bfloat16. To support bfloat16, please set opset version explicitly like below example. + +``` +git clone https://github.com/huggingface/optimum +cd optimum +pip install -e . + +optimum-cli export onnx --model stabilityai/stable-diffusion-3-medium-diffusers ./sd3_onnx/fp32 --opset 15 +optimum-cli export onnx --model stabilityai/stable-diffusion-3.5-medium ./sd3.5_medium_onnx/fp32 --opset 15 +optimum-cli export onnx --model stabilityai/stable-diffusion-3.5-large ./sd3.5_large_onnx/fp32 --opset 15 +optimum-cli export onnx --model black-forest-labs/FLUX.1-schnell ./flux1_schnell_onnx/fp32 --opset 15 +optimum-cli export onnx --model black-forest-labs/FLUX.1-dev ./flux1_dev_onnx/fp32 --opset 15 ``` ### Optimize ONNX Pipeline -Example to optimize the exported float32 ONNX models, and save to float16 models: +Example to optimize the exported float32 ONNX models, then save to float16 models: ``` -python -m onnxruntime.transformers.models.stable_diffusion.optimize_pipeline -i ./sd_v1_5/fp32 -o ./sd_v1_5/fp16 --float16 +python -m onnxruntime.transformers.models.stable_diffusion.optimize_pipeline -i ./sd1.5_onnx/fp32 -o ./sd1.5_onnx/fp16 --float16 ``` -In all examples below, we run the scripts in source code directory. You can get source code like the following: +You can also run the script in source code directory like the following: ``` git clone https://github.com/microsoft/onnxruntime cd onnxruntime/onnxruntime/python/tools/transformers/models/stable_diffusion + +python optimize_pipeline.py -i ./sdxl_onnx/fp32 -o ./sdxl_onnx/fp16 --float16 +python optimize_pipeline.py -i ./sd3_onnx/fp32 -o ./sd3_onnx/fp16 --float16 +python optimize_pipeline.py -i ./sd3.5_medium_onnx/fp32 -o ./sd3.5_medium_onnx/fp16 --float16 +python optimize_pipeline.py -i ./sd3.5_large_onnx/fp32 -o ./sd3.5_large_onnx/fp16 --float16 +python optimize_pipeline.py -i ./flux1_schnell_onnx/fp32 -o ./flux1_schnell_onnx/fp16 --float16 --bfloat16 +python optimize_pipeline.py -i ./flux1_dev_onnx/fp32 -o ./flux1_dev_onnx/fp16 --float16 --bfloat16 ``` +When converting model to float16, some nodes has overflow risk and we can force those nodes to run in either float32 or bfloat16. +Option `--bfloat16` enables the later. If an operator does not support bfloat16, it will fallback to float32. For SDXL model, it is recommended to use a machine with 48 GB or more memory to optimize. -``` -python optimize_pipeline.py -i ./sd_xl_base_onnx -o ./sd_xl_base_fp16 --float16 -``` ### Run Benchmark +#### Run Benchmark with Optimum + The benchmark.py script will run a warm-up prompt twice, and measure the peak GPU memory usage in these two runs, then record them as first_run_memory_MB and second_run_memory_MB. Then it will run 5 runs to get average latency (in seconds), and output the results to benchmark_result.csv. Note that the first run might need more time and memory: For example, cuDNN convolution algorithm search or model compile happens in the first run. @@ -245,15 +270,15 @@ Before running benchmark on PyTorch, you need to be logged in via `huggingface-c Example to benchmark the optimized pipeline of stable diffusion 1.5 with batch size 1 on CUDA EP: ``` -python benchmark.py -p ./sd_v1_5/fp16 -b 1 -v 1.5 +python benchmark.py -p ./sd1.5_onnx/fp16 -b 1 -v 1.5 python benchmark.py -b 1 -v 1.5 ``` For the first command, '-p' specifies a directory of optimized ONNX pipeline as generated by optimize_pipeline.py. -For the second command without '-p', we will use OnnxruntimeCudaStableDiffusionPipeline to export and optimize ONNX models for clip, unet and vae decoder. +For the second command without '-p', we will use ORTPipelineForText2Image to export and optimize ONNX models for clip, unet and vae decoder. On ROCm EP, use the following command instead: ``` -python benchmark.py -p ./sd_v1_5/fp16 -b 1 --tuning --provider rocm -v 1.5 +python benchmark.py -p ./sd1.5_onnx/fp16 -b 1 --tuning --provider rocm -v 1.5 ``` For ROCm EP, you can substitute `python benchmark.py` with `python -m onnxruntime.transformers.models.stable_diffusion.benchmark` since @@ -263,6 +288,22 @@ For ROCm EP, the `--tuning` is mandatory because we heavily rely on tuning to fi The default parameters are stable diffusion version=1.5, height=512, width=512, steps=50, batch_count=5. Run `python benchmark.py --help` for more information. +#### Stable Diffusion 3.x and Flux 1.0 +Example of benchmark with optimum using CUDA provider on stable diffusion 3.5 medium and Flux 1.0: +``` +python benchmark.py -e optimum --height 1024 --width 1024 --steps 30 -b 1 -v 3.0M -p sd3_onnx/fp32 +python benchmark.py -e optimum --height 1024 --width 1024 --steps 30 -b 1 -v 3.5M -p sd3.5_medium_onnx/fp16 +python benchmark.py -e optimum --height 1024 --width 1024 --steps 30 -b 1 -v 3.5L -p sd3.5_large_onnx/fp16 +python benchmark.py -e optimum --height 1024 --width 1024 --steps 4 -b 1 -v Flux.1S -p flux1_schnell_onnx/fp16 +python benchmark.py -e optimum --height 1024 --width 1024 --steps 30 -b 1 -v Flux.1D -p flux1_dev_onnx/fp16 +``` + +Benchmark PyTorch eager mode performance: +``` +python benchmark.py -e torch --height 1024 --width 1024 --steps 30 -b 1 -v 3.5L +python benchmark.py -e torch --height 1024 --width 1024 --steps 30 -b 1 -v Flux.1D +``` + ### Run Benchmark with xFormers Run PyTorch 1.13.1+cu117 with xFormers like the following diff --git a/onnxruntime/python/tools/transformers/models/stable_diffusion/benchmark.py b/onnxruntime/python/tools/transformers/models/stable_diffusion/benchmark.py index 0708d57f040f8..99a2d9379598d 100755 --- a/onnxruntime/python/tools/transformers/models/stable_diffusion/benchmark.py +++ b/onnxruntime/python/tools/transformers/models/stable_diffusion/benchmark.py @@ -9,6 +9,7 @@ import statistics import sys import time +from pathlib import Path import __init__ # noqa: F401. Walk-around to run this script directly import coloredlogs @@ -22,6 +23,11 @@ "2.0": "stabilityai/stable-diffusion-2", "2.1": "stabilityai/stable-diffusion-2-1", "xl-1.0": "stabilityai/stable-diffusion-xl-refiner-1.0", + "3.0M": "stabilityai/stable-diffusion-3-medium-diffusers", + "3.5M": "stabilityai/stable-diffusion-3.5-medium", + "3.5L": "stabilityai/stable-diffusion-3.5-large", + "Flux.1S": "black-forest-labs/FLUX.1-schnell", + "Flux.1D": "black-forest-labs/FLUX.1-dev", } PROVIDERS = { @@ -90,6 +96,24 @@ def get_ort_pipeline(model_name: str, directory: str, provider, disable_safety_c def get_torch_pipeline(model_name: str, disable_safety_checker: bool, enable_torch_compile: bool, use_xformers: bool): + if "FLUX" in model_name: + from diffusers import FluxPipeline + + pipe = FluxPipeline.from_pretrained(model_name, torch_dtype=torch.bfloat16).to("cuda") + if enable_torch_compile: + pipe.transformer.to(memory_format=torch.channels_last) + pipe.transformer = torch.compile(pipe.transformer, mode="max-autotune", fullgraph=True) + return pipe + + if "stable-diffusion-3" in model_name: + from diffusers import StableDiffusion3Pipeline + + pipe = StableDiffusion3Pipeline.from_pretrained(model_name, torch_dtype=torch.bfloat16).to("cuda") + if enable_torch_compile: + pipe.transformer.to(memory_format=torch.channels_last) + pipe.transformer = torch.compile(pipe.transformer, mode="max-autotune", fullgraph=True) + return pipe + from diffusers import DDIMScheduler, StableDiffusionPipeline from torch import channels_last, float16 @@ -116,9 +140,9 @@ def get_torch_pipeline(model_name: str, disable_safety_checker: bool, enable_tor return pipe -def get_image_filename_prefix(engine: str, model_name: str, batch_size: int, disable_safety_checker: bool): +def get_image_filename_prefix(engine: str, model_name: str, batch_size: int, steps: int, disable_safety_checker: bool): short_model_name = model_name.split("/")[-1].replace("stable-diffusion-", "sd") - return f"{engine}_{short_model_name}_b{batch_size}" + ("" if disable_safety_checker else "_safe") + return f"{engine}_{short_model_name}_b{batch_size}_s{steps}" + ("" if disable_safety_checker else "_safe") def run_ort_pipeline( @@ -132,6 +156,7 @@ def run_ort_pipeline( batch_count, start_memory, memory_monitor_type, + skip_warmup: bool = False, ): from diffusers import OnnxStableDiffusionPipeline @@ -140,6 +165,8 @@ def run_ort_pipeline( prompts, negative_prompt = example_prompts() def warmup(): + if skip_warmup: + return prompt, negative = warmup_prompts() pipe( prompt=[prompt] * batch_size, @@ -193,6 +220,25 @@ def warmup(): } +def get_negative_prompt_kwargs(negative_prompt, use_num_images_per_prompt, is_flux, batch_size) -> dict: + # Flux does not support negative prompt + kwargs = ( + ( + {"negative_prompt": negative_prompt} + if use_num_images_per_prompt + else {"negative_prompt": [negative_prompt] * batch_size} + ) + if not is_flux + else {} + ) + + # Fix the random seed so that we can inspect the output quality easily. + if torch.cuda.is_available(): + kwargs["generator"] = torch.Generator(device="cuda").manual_seed(123) + + return kwargs + + def run_torch_pipeline( pipe, batch_size: int, @@ -204,19 +250,20 @@ def run_torch_pipeline( batch_count, start_memory, memory_monitor_type, + skip_warmup=False, ): prompts, negative_prompt = example_prompts() - # total 2 runs of warm up, and measure GPU memory for CUDA EP + import diffusers + + is_flux = isinstance(pipe, diffusers.FluxPipeline) + def warmup(): + if skip_warmup: + return prompt, negative = warmup_prompts() - pipe( - prompt=[prompt] * batch_size, - height=height, - width=width, - num_inference_steps=steps, - negative_prompt=[negative] * batch_size, - ) + extra_kwargs = get_negative_prompt_kwargs(negative, False, is_flux, batch_size) + pipe(prompt=[prompt] * batch_size, height=height, width=width, num_inference_steps=steps, **extra_kwargs) # Run warm up, and measure GPU memory of two runs (The first run has cuDNN algo search so it might need more memory) first_run_memory = measure_gpu_memory(memory_monitor_type, warmup, start_memory) @@ -232,13 +279,13 @@ def warmup(): break torch.cuda.synchronize() inference_start = time.time() + extra_kwargs = get_negative_prompt_kwargs(negative_prompt, False, is_flux, batch_size) images = pipe( prompt=[prompt] * batch_size, height=height, width=width, num_inference_steps=steps, - negative_prompt=[negative_prompt] * batch_size, - generator=None, # torch.Generator + **extra_kwargs, ).images torch.cuda.synchronize() @@ -279,6 +326,7 @@ def run_ort( start_memory, memory_monitor_type, tuning: bool, + skip_warmup: bool = False, ): provider_and_options = provider if tuning and provider in ["CUDAExecutionProvider", "ROCMExecutionProvider"]: @@ -289,7 +337,7 @@ def run_ort( load_end = time.time() print(f"Model loading took {load_end - load_start} seconds") - image_filename_prefix = get_image_filename_prefix("ort", model_name, batch_size, disable_safety_checker) + image_filename_prefix = get_image_filename_prefix("ort", model_name, batch_size, steps, disable_safety_checker) result = run_ort_pipeline( pipe, batch_size, @@ -301,6 +349,7 @@ def run_ort( batch_count, start_memory, memory_monitor_type, + skip_warmup=skip_warmup, ) result.update( @@ -322,33 +371,12 @@ def get_optimum_ort_pipeline( disable_safety_checker: bool = True, use_io_binding: bool = False, ): - from optimum.onnxruntime import ORTStableDiffusionPipeline, ORTStableDiffusionXLPipeline + from optimum.onnxruntime import ORTPipelineForText2Image if directory is not None and os.path.exists(directory): - if "xl" in model_name: - pipeline = ORTStableDiffusionXLPipeline.from_pretrained( - directory, - provider=provider, - session_options=None, - use_io_binding=False, # Not supported by Optimum version 1.17.1 at the time of verification. - ) - else: - pipeline = ORTStableDiffusionPipeline.from_pretrained( - directory, - provider=provider, - use_io_binding=use_io_binding, - ) - elif "xl" in model_name: - pipeline = ORTStableDiffusionXLPipeline.from_pretrained( - model_name, - export=True, - provider=provider, - session_options=None, - use_io_binding=False, # Not supported by Optimum version 1.17.1 at the time of verification. - ) - pipeline.save_pretrained(directory) + pipeline = ORTPipelineForText2Image.from_pretrained(directory, provider=provider, use_io_binding=use_io_binding) else: - pipeline = ORTStableDiffusionPipeline.from_pretrained( + pipeline = ORTPipelineForText2Image.from_pretrained( model_name, export=True, provider=provider, @@ -375,32 +403,31 @@ def run_optimum_ort_pipeline( start_memory, memory_monitor_type, use_num_images_per_prompt=False, + skip_warmup=False, ): - from optimum.onnxruntime import ORTStableDiffusionPipeline, ORTStableDiffusionXLPipeline + print("Pipeline type", type(pipe)) + from optimum.onnxruntime.modeling_diffusion import ORTFluxPipeline - assert isinstance(pipe, (ORTStableDiffusionPipeline, ORTStableDiffusionXLPipeline)) + is_flux = isinstance(pipe, ORTFluxPipeline) prompts, negative_prompt = example_prompts() def warmup(): + if skip_warmup: + return prompt, negative = warmup_prompts() + extra_kwargs = get_negative_prompt_kwargs(negative, use_num_images_per_prompt, is_flux, batch_size) if use_num_images_per_prompt: pipe( prompt=prompt, height=height, width=width, num_inference_steps=steps, - negative_prompt=negative, num_images_per_prompt=batch_count, + **extra_kwargs, ) else: - pipe( - prompt=[prompt] * batch_size, - height=height, - width=width, - num_inference_steps=steps, - negative_prompt=[negative] * batch_size, - ) + pipe(prompt=[prompt] * batch_size, height=height, width=width, num_inference_steps=steps, **extra_kwargs) # Run warm up, and measure GPU memory of two runs. # The first run has algo search for cuDNN/MIOpen, so it might need more memory. @@ -409,6 +436,8 @@ def warmup(): warmup() + extra_kwargs = get_negative_prompt_kwargs(negative_prompt, use_num_images_per_prompt, is_flux, batch_size) + latency_list = [] for i, prompt in enumerate(prompts): if i >= num_prompts: @@ -420,16 +449,12 @@ def warmup(): height=height, width=width, num_inference_steps=steps, - negative_prompt=negative_prompt, num_images_per_prompt=batch_size, + **extra_kwargs, ).images else: images = pipe( - prompt=[prompt] * batch_size, - height=height, - width=width, - num_inference_steps=steps, - negative_prompt=[negative_prompt] * batch_size, + prompt=[prompt] * batch_size, height=height, width=width, num_inference_steps=steps, **extra_kwargs ).images inference_end = time.time() latency = inference_end - inference_start @@ -470,6 +495,7 @@ def run_optimum_ort( start_memory, memory_monitor_type, use_io_binding: bool = False, + skip_warmup: bool = False, ): load_start = time.time() pipe = get_optimum_ort_pipeline( @@ -478,7 +504,10 @@ def run_optimum_ort( load_end = time.time() print(f"Model loading took {load_end - load_start} seconds") - image_filename_prefix = get_image_filename_prefix("optimum", model_name, batch_size, disable_safety_checker) + full_model_name = model_name + "_" + Path(directory).name if directory else model_name + image_filename_prefix = get_image_filename_prefix( + "optimum", full_model_name, batch_size, steps, disable_safety_checker + ) result = run_optimum_ort_pipeline( pipe, batch_size, @@ -490,6 +519,7 @@ def run_optimum_ort( batch_count, start_memory, memory_monitor_type, + skip_warmup=skip_warmup, ) result.update( @@ -583,7 +613,7 @@ def warmup(): warmup() - image_filename_prefix = get_image_filename_prefix("ort_trt", short_name, batch_size, disable_safety_checker) + image_filename_prefix = get_image_filename_prefix("ort_trt", short_name, batch_size, steps, disable_safety_checker) latency_list = [] prompts, negative_prompt = example_prompts() @@ -651,6 +681,7 @@ def run_tensorrt_static( max_batch_size: int, nvtx_profile: bool = False, use_cuda_graph: bool = True, + skip_warmup: bool = False, ): print("[I] Initializing TensorRT accelerated StableDiffusionXL txt2img pipeline (static input shape)") @@ -712,6 +743,8 @@ def run_tensorrt_static( pipeline.load_resources(height, width, batch_size) def warmup(): + if skip_warmup: + return prompt, negative = warmup_prompts() pipeline.run([prompt] * batch_size, [negative] * batch_size, height, width, denoising_steps=steps) @@ -722,7 +755,7 @@ def warmup(): warmup() - image_filename_prefix = get_image_filename_prefix("trt", model_name, batch_size, disable_safety_checker) + image_filename_prefix = get_image_filename_prefix("trt", model_name, batch_size, steps, disable_safety_checker) latency_list = [] prompts, negative_prompt = example_prompts() @@ -783,6 +816,7 @@ def run_tensorrt_static_xl( max_batch_size: int, nvtx_profile: bool = False, use_cuda_graph=True, + skip_warmup: bool = False, ): print("[I] Initializing TensorRT accelerated StableDiffusionXL txt2img pipeline (static input shape)") @@ -866,6 +900,8 @@ def run_sd_xl_inference(prompt, negative_prompt, seed=None): ) def warmup(): + if skip_warmup: + return prompt, negative = warmup_prompts() run_sd_xl_inference([prompt] * batch_size, [negative] * batch_size) @@ -877,7 +913,7 @@ def warmup(): warmup() model_name = pipeline_info.name() - image_filename_prefix = get_image_filename_prefix("trt", model_name, batch_size, disable_safety_checker) + image_filename_prefix = get_image_filename_prefix("trt", model_name, batch_size, steps, disable_safety_checker) latency_list = [] prompts, negative_prompt = example_prompts() @@ -930,6 +966,7 @@ def run_ort_trt_xl( max_batch_size: int, nvtx_profile: bool = False, use_cuda_graph=True, + skip_warmup: bool = False, ): from demo_utils import initialize_pipeline from engine_builder import EngineType @@ -961,6 +998,8 @@ def run_sd_xl_inference(prompt, negative_prompt, seed=None): ) def warmup(): + if skip_warmup: + return prompt, negative = warmup_prompts() run_sd_xl_inference([prompt] * batch_size, [negative] * batch_size) @@ -972,7 +1011,7 @@ def warmup(): warmup() model_name = pipeline.pipeline_info.name() - image_filename_prefix = get_image_filename_prefix("ort_trt", model_name, batch_size, disable_safety_checker) + image_filename_prefix = get_image_filename_prefix("ort_trt", model_name, batch_size, steps, disable_safety_checker) latency_list = [] prompts, negative_prompt = example_prompts() @@ -1029,6 +1068,7 @@ def run_torch( batch_count: int, start_memory, memory_monitor_type, + skip_warmup: bool = True, ): torch.backends.cudnn.enabled = True torch.backends.cudnn.benchmark = True @@ -1040,7 +1080,7 @@ def run_torch( load_end = time.time() print(f"Model loading took {load_end - load_start} seconds") - image_filename_prefix = get_image_filename_prefix("torch", model_name, batch_size, disable_safety_checker) + image_filename_prefix = get_image_filename_prefix("torch", model_name, batch_size, steps, disable_safety_checker) if not enable_torch_compile: with torch.inference_mode(): @@ -1055,6 +1095,7 @@ def run_torch( batch_count, start_memory, memory_monitor_type, + skip_warmup=skip_warmup, ) else: result = run_torch_pipeline( @@ -1068,6 +1109,7 @@ def run_torch( batch_count, start_memory, memory_monitor_type, + skip_warmup=skip_warmup, ) result.update( @@ -1173,6 +1215,14 @@ def parse_arguments(): ) parser.set_defaults(use_io_binding=False) + parser.add_argument( + "--skip_warmup", + required=False, + action="store_true", + help="No warmup.", + ) + parser.set_defaults(skip_warmup=False) + parser.add_argument( "-b", "--batch_size", @@ -1312,6 +1362,7 @@ def main(): max_batch_size=args.max_trt_batch_size, nvtx_profile=False, use_cuda_graph=args.enable_cuda_graph, + skip_warmup=args.skip_warmup, ) else: print("Testing Txt2ImgPipeline with static input shape. Backend is ORT TensorRT EP.") @@ -1330,6 +1381,7 @@ def main(): max_batch_size=args.max_trt_batch_size, nvtx_profile=False, use_cuda_graph=args.enable_cuda_graph, + skip_warmup=args.skip_warmup, ) elif args.engine == "optimum" and provider == "CUDAExecutionProvider": if "xl" in args.version: @@ -1349,11 +1401,12 @@ def main(): start_memory=start_memory, memory_monitor_type=memory_monitor_type, use_io_binding=args.use_io_binding, + skip_warmup=args.skip_warmup, ) elif args.engine == "onnxruntime": - assert args.pipeline and os.path.isdir( - args.pipeline - ), "--pipeline should be specified for the directory of ONNX models" + assert args.pipeline and os.path.isdir(args.pipeline), ( + "--pipeline should be specified for the directory of ONNX models" + ) print(f"Testing diffusers StableDiffusionPipeline with {provider} provider and tuning={args.tuning}") result = run_ort( model_name=sd_model, @@ -1369,6 +1422,7 @@ def main(): start_memory=start_memory, memory_monitor_type=memory_monitor_type, tuning=args.tuning, + skip_warmup=args.skip_warmup, ) elif args.engine == "tensorrt" and "xl" in args.version: print("Testing Txt2ImgXLPipeline with static input shape. Backend is TensorRT.") @@ -1387,6 +1441,7 @@ def main(): max_batch_size=args.max_trt_batch_size, nvtx_profile=False, use_cuda_graph=args.enable_cuda_graph, + skip_warmup=args.skip_warmup, ) elif args.engine == "tensorrt": print("Testing Txt2ImgPipeline with static input shape. Backend is TensorRT.") @@ -1406,6 +1461,7 @@ def main(): max_batch_size=args.max_trt_batch_size, nvtx_profile=False, use_cuda_graph=args.enable_cuda_graph, + skip_warmup=args.skip_warmup, ) else: print( @@ -1424,6 +1480,7 @@ def main(): batch_count=args.batch_count, start_memory=start_memory, memory_monitor_type=memory_monitor_type, + skip_warmup=args.skip_warmup, ) print(result) diff --git a/onnxruntime/python/tools/transformers/models/stable_diffusion/benchmark_flux.sh b/onnxruntime/python/tools/transformers/models/stable_diffusion/benchmark_flux.sh new file mode 100644 index 0000000000000..2c7785eb8f62f --- /dev/null +++ b/onnxruntime/python/tools/transformers/models/stable_diffusion/benchmark_flux.sh @@ -0,0 +1,126 @@ +#!/bin/bash +# ------------------------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. +# ------------------------------------------------------------------------- + +set -euo pipefail + +# Script to benchmark Flux models with ONNX and PyTorch +# Usage: bash benchmark_flux.sh + +# Validate inputs and environment +command -v python3 &>/dev/null || { echo "Python3 is required but not installed."; exit 1; } +command -v wget &>/dev/null || { echo "wget is required but not installed."; exit 1; } + +# Input arguments with defaults +install_dir="${1:-$HOME}" +onnx_dir="${2:-onnx_models}" + +# GPU settings +export CUDA_VISIBLE_DEVICES=0 + +# Function to log messages +log() { + echo -e "\033[1;32m[INFO]\033[0m $1" +} + +# Function to install CUDA 12.6 +install_cuda_12() { + log "Installing CUDA 12.6" + pushd "$install_dir" + wget -q https://developer.download.nvidia.com/compute/cuda/12.6.2/local_installers/cuda_12.6.2_560.35.03_linux.run + sh cuda_12.6.2_560.35.03_linux.run --toolkit --toolkitpath="$install_dir/cuda12.6" --silent --override --no-man-page + export PATH="$install_dir/cuda12.6/bin:$PATH" + export LD_LIBRARY_PATH="$install_dir/cuda12.6/lib64:$LD_LIBRARY_PATH" + popd +} + +# Function to install cuDNN 9.6 +install_cudnn_9() { + log "Installing cuDNN 9.6" + pushd "$install_dir" + wget -q https://developer.download.nvidia.com/compute/cudnn/redist/cudnn/linux-x86_64/cudnn-linux-x86_64-9.6.0.74_cuda12-archive.tar.xz + mkdir -p "$install_dir/cudnn9.6" + tar -Jxvf cudnn-linux-x86_64-9.6.0.74_cuda12-archive.tar.xz -C "$install_dir/cudnn9.6" --strip=1 + export LD_LIBRARY_PATH="$install_dir/cudnn9.6/lib:$LD_LIBRARY_PATH" + popd +} + +# Function to install optimum +install_optimum() { + log "Installing Optimum" + optimum_dir="$install_dir/optimum" + if [ ! -d "$optimum_dir" ]; then + git clone https://github.com/huggingface/optimum "$optimum_dir" + fi + pushd "$optimum_dir" + pip show optimum &>/dev/null || pip install -e . + popd +} + +# Function to build and install ONNX Runtime +install_onnxruntime() { + log "Building ONNX Runtime" + pushd "$install_dir" + if [ ! -d onnxruntime ]; then + git clone https://github.com/microsoft/onnxruntime + fi + pushd onnxruntime + pip install --upgrade pip cmake psutil setuptools wheel packaging ninja numpy==2.2 + sh build.sh --config Release --build_dir build/cuda12 --parallel \ + --use_cuda --cuda_version 12.6 --cuda_home "$install_dir/cuda12.6" \ + --cudnn_home "$install_dir/cudnn9.6" \ + --build_wheel --skip_tests \ + --cmake_generator Ninja \ + --compile_no_warning_as_error \ + --cmake_extra_defines onnxruntime_BUILD_UNIT_TESTS=OFF CMAKE_CUDA_ARCHITECTURES=native + + log "Installing ONNX Runtime" + pip install build/cuda12/Release/dist/onnxruntime_gpu-*-linux_x86_64.whl + popd + popd +} + +# Function to install GPU dependencies +install_gpu() { + log "Installing GPU dependencies" + [ ! -d "$install_dir/cuda12.6" ] && install_cuda_12 + [ ! -d "$install_dir/cudnn9.6" ] && install_cudnn_9 + pip install torch torchvision --index-url https://download.pytorch.org/whl/cu124 + pip install diffusers==0.32.0 transformers==4.46.3 onnx==1.17.0 protobuf==5.29.2 py3nvml + install_onnxruntime + install_optimum +} + +# Function to run benchmarks +run_benchmark() { + local model=$1 + local dir=$2 + local version=$3 + local steps=$4 + local batch=$5 + + log "Running benchmark for model: $model" + mkdir -p "$dir" + [ ! -d "$dir/fp32" ] && optimum-cli export onnx --model "$model" "$dir/fp32" --opset 15 --task text-to-image + [ ! -d "$dir/fp16_fp32" ] && python optimize_pipeline.py -i "$dir/fp32" -o "$dir/fp16_fp32" --float16 + [ ! -d "$dir/fp16_bf16" ] && python optimize_pipeline.py -i "$dir/fp32" -o "$dir/fp16_bf16" --float16 --bfloat16 + python benchmark.py -e optimum --height 1024 --width 1024 --steps "$steps" -b "$batch" -v "$version" -p "$dir/fp16_fp32" + python benchmark.py -e optimum --height 1024 --width 1024 --steps "$steps" -b "$batch" -v "$version" -p "$dir/fp16_bf16" + python benchmark.py -e torch --height 1024 --width 1024 --steps "$steps" -b "$batch" -v "$version" + python benchmark.py -e torch --height 1024 --width 1024 --steps "$steps" -b "$batch" -v "$version" --enable_torch_compile +} + +# Main script execution +install_gpu + +log "Creating ONNX model directory: $onnx_dir" +mkdir -p "$onnx_dir" + +run_benchmark black-forest-labs/FLUX.1-schnell "$onnx_dir/flux1_schnell" Flux.1S 4 1 > "$onnx_dir/flux1_schnell_s4_b1.log" +run_benchmark black-forest-labs/FLUX.1-dev "$onnx_dir/flux1_dev" Flux.1D 50 1 > "$onnx_dir/flux1_dev_s50_b1.log" +run_benchmark stabilityai/stable-diffusion-3.5-large "$onnx_dir/sd3.5_large" 3.5L 50 1 > "$onnx_dir/sd3.5_large_s50_b1.log" +run_benchmark stabilityai/stable-diffusion-3.5-medium "$onnx_dir/sd3.5_medium" 3.5M 50 1 > "$onnx_dir/sd3.5_medium_s50_b1.log" + +log "Benchmark completed." diff --git a/onnxruntime/python/tools/transformers/models/stable_diffusion/demo_utils.py b/onnxruntime/python/tools/transformers/models/stable_diffusion/demo_utils.py index a50940933eb82..30f4663100d8a 100644 --- a/onnxruntime/python/tools/transformers/models/stable_diffusion/demo_utils.py +++ b/onnxruntime/python/tools/transformers/models/stable_diffusion/demo_utils.py @@ -23,7 +23,7 @@ import os import sys from importlib.metadata import PackageNotFoundError, version -from typing import Any, Dict, List, Optional +from typing import Any import controlnet_aux import cv2 @@ -307,7 +307,7 @@ def max_batch(args): return max_batch_size -def get_metadata(args, is_xl: bool = False) -> Dict[str, Any]: +def get_metadata(args, is_xl: bool = False) -> dict[str, Any]: metadata = { "command": " ".join(['"' + x + '"' if " " in x else x for x in sys.argv]), "args.prompt": args.prompt, @@ -410,7 +410,7 @@ def initialize_pipeline( lora_scale: float = 1.0, use_fp16_vae: bool = True, use_vae: bool = True, - framework_model_dir: Optional[str] = None, + framework_model_dir: str | None = None, max_cuda_graphs: int = 1, ): pipeline_info = PipelineInfo( @@ -649,7 +649,7 @@ def get_canny_image(image) -> Image.Image: return image -def process_controlnet_images_xl(args) -> List[Image.Image]: +def process_controlnet_images_xl(args) -> list[Image.Image]: """ Process control image for SDXL control net. """ diff --git a/onnxruntime/python/tools/transformers/models/stable_diffusion/diffusion_models.py b/onnxruntime/python/tools/transformers/models/stable_diffusion/diffusion_models.py index c2cfc165e32cf..8dcda8a7633ac 100644 --- a/onnxruntime/python/tools/transformers/models/stable_diffusion/diffusion_models.py +++ b/onnxruntime/python/tools/transformers/models/stable_diffusion/diffusion_models.py @@ -24,7 +24,6 @@ import logging import os import tempfile -from typing import Dict, List, Optional import onnx import onnx_graphsurgeon as gs @@ -135,7 +134,7 @@ def is_xl_refiner(self) -> bool: def use_safetensors(self) -> bool: return self.is_xl() or self.version in ["sd-turbo"] - def stages(self) -> List[str]: + def stages(self) -> list[str]: if self.is_xl_base_or_turbo(): return ["clip", "clip2", "unetxl"] + (["vae"] if self._use_vae else []) @@ -150,11 +149,11 @@ def vae_scaling_factor(self) -> float: def vae_torch_fallback(self) -> bool: return self.is_xl() and not self._use_fp16_vae - def custom_fp16_vae(self) -> Optional[str]: + def custom_fp16_vae(self) -> str | None: # For SD XL, use a VAE that fine-tuned to run in fp16 precision without generating NaNs return "madebyollin/sdxl-vae-fp16-fix" if self._use_fp16_vae and self.is_xl() else None - def custom_unet(self) -> Optional[str]: + def custom_unet(self) -> str | None: return "latent-consistency/lcm-sdxl" if self._use_lcm and self.is_xl_base() else None @staticmethod @@ -372,13 +371,13 @@ def from_pretrained(self, model_class, framework_model_dir, subfolder=None, mode def load_model(self, framework_model_dir: str, subfolder: str): pass - def get_input_names(self) -> List[str]: + def get_input_names(self) -> list[str]: pass - def get_output_names(self) -> List[str]: + def get_output_names(self) -> list[str]: pass - def get_dynamic_axes(self) -> Dict[str, Dict[int, str]]: + def get_dynamic_axes(self) -> dict[str, dict[int, str]]: pass def get_sample_input(self, batch_size, image_height, image_width) -> tuple: @@ -418,7 +417,7 @@ def get_input_profile(self, batch_size, image_height, image_width, static_batch, def get_shape_dict(self, batch_size, image_height, image_width): pass - def fp32_input_output_names(self) -> List[str]: + def fp32_input_output_names(self) -> list[str]: """For CUDA EP, we export ONNX model with FP32 first, then convert it to mixed precision model. This is a list of input or output names that are kept as float32 in optimized model. """ @@ -720,7 +719,7 @@ def __init__(self, unet, controlnets: ControlNetModel): def forward(self, sample, timestep, encoder_hidden_states, controlnet_images, controlnet_scales): for i, (controlnet_image, conditioning_scale, controlnet) in enumerate( - zip(controlnet_images, controlnet_scales, self.controlnets) + zip(controlnet_images, controlnet_scales, self.controlnets, strict=False) ): down_samples, mid_sample = controlnet( sample, @@ -739,7 +738,7 @@ def forward(self, sample, timestep, encoder_hidden_states, controlnet_images, co else: down_block_res_samples = [ samples_prev + samples_curr - for samples_prev, samples_curr in zip(down_block_res_samples, down_samples) + for samples_prev, samples_curr in zip(down_block_res_samples, down_samples, strict=False) ] mid_block_res_sample += mid_sample @@ -772,7 +771,7 @@ def forward( ): added_cond_kwargs = {"text_embeds": text_embeds, "time_ids": time_ids} for i, (controlnet_image, conditioning_scale, controlnet) in enumerate( - zip(controlnet_images, controlnet_scales, self.controlnets) + zip(controlnet_images, controlnet_scales, self.controlnets, strict=False) ): down_samples, mid_sample = controlnet( sample, @@ -790,7 +789,7 @@ def forward( else: down_block_res_samples = [ samples_prev + samples_curr - for samples_prev, samples_curr in zip(down_block_res_samples, down_samples) + for samples_prev, samples_curr in zip(down_block_res_samples, down_samples, strict=False) ] mid_block_res_sample += mid_sample @@ -1152,7 +1151,7 @@ def __init__( device, max_batch_size, fp16: bool = False, - custom_fp16_vae: Optional[str] = None, + custom_fp16_vae: str | None = None, ): super().__init__( pipeline_info, @@ -1232,7 +1231,7 @@ def get_sample_input(self, batch_size, image_height, image_width): dtype = torch.float16 if self.fp16 else torch.float32 return (torch.randn(batch_size, 4, latent_height, latent_width, dtype=dtype, device=self.device),) - def fp32_input_output_names(self) -> List[str]: + def fp32_input_output_names(self) -> list[str]: return [] diff --git a/onnxruntime/python/tools/transformers/models/stable_diffusion/diffusion_schedulers.py b/onnxruntime/python/tools/transformers/models/stable_diffusion/diffusion_schedulers.py index 57cb51bbea52d..ff23874000019 100644 --- a/onnxruntime/python/tools/transformers/models/stable_diffusion/diffusion_schedulers.py +++ b/onnxruntime/python/tools/transformers/models/stable_diffusion/diffusion_schedulers.py @@ -21,7 +21,6 @@ # limitations under the License. # -------------------------------------------------------------------------- -from typing import List, Optional import numpy as np import torch @@ -156,8 +155,7 @@ def step( model_output = (alpha_prod_t**0.5) * model_output + (beta_prod_t**0.5) * sample else: raise ValueError( - f"prediction_type given as {self.prediction_type} must be one of `epsilon`, `sample`, or" - " `v_prediction`" + f"prediction_type given as {self.prediction_type} must be one of `epsilon`, `sample`, or `v_prediction`" ) # 4. Clip "predicted x_0" @@ -392,8 +390,8 @@ def __init__( predict_x0: bool = True, solver_type: str = "bh2", lower_order_final: bool = True, - disable_corrector: Optional[List[int]] = None, - use_karras_sigmas: Optional[bool] = False, + disable_corrector: list[int] | None = None, + use_karras_sigmas: bool | None = False, timestep_spacing: str = "linspace", steps_offset: int = 0, sigma_min=None, @@ -628,7 +626,7 @@ def multistep_uni_p_bh_update( model_output: torch.FloatTensor, *args, sample: torch.FloatTensor = None, - order: Optional[int] = None, + order: int | None = None, **kwargs, ) -> torch.FloatTensor: prev_timestep = args[0] if len(args) > 0 else kwargs.pop("prev_timestep", None) @@ -735,7 +733,7 @@ def multistep_uni_c_bh_update( *args, last_sample: torch.FloatTensor = None, this_sample: torch.FloatTensor = None, - order: Optional[int] = None, + order: int | None = None, **kwargs, ) -> torch.FloatTensor: this_timestep = args[0] if len(args) > 0 else kwargs.pop("this_timestep", None) @@ -1085,7 +1083,7 @@ def step( model_output: torch.FloatTensor, timestep: int, sample: torch.FloatTensor, - generator: Optional[torch.Generator] = None, + generator: torch.Generator | None = None, ): if self.num_inference_steps is None: raise ValueError( diff --git a/onnxruntime/python/tools/transformers/models/stable_diffusion/engine_builder.py b/onnxruntime/python/tools/transformers/models/stable_diffusion/engine_builder.py index 7609ae10fc96d..d36411a1fa84d 100644 --- a/onnxruntime/python/tools/transformers/models/stable_diffusion/engine_builder.py +++ b/onnxruntime/python/tools/transformers/models/stable_diffusion/engine_builder.py @@ -5,7 +5,6 @@ import hashlib import os from enum import Enum -from typing import Optional import torch from diffusion_models import CLIP, VAE, CLIPWithProj, PipelineInfo, UNet, UNetXL @@ -275,7 +274,7 @@ def vae_decode(self, latents): def get_engine_paths( - work_dir: str, pipeline_info: PipelineInfo, engine_type: EngineType, framework_model_dir: Optional[str] = None + work_dir: str, pipeline_info: PipelineInfo, engine_type: EngineType, framework_model_dir: str | None = None ): root_dir = work_dir or "." short_name = pipeline_info.short_name() diff --git a/onnxruntime/python/tools/transformers/models/stable_diffusion/engine_builder_ort_cuda.py b/onnxruntime/python/tools/transformers/models/stable_diffusion/engine_builder_ort_cuda.py index 56012e223b18c..040e3a38dbc52 100644 --- a/onnxruntime/python/tools/transformers/models/stable_diffusion/engine_builder_ort_cuda.py +++ b/onnxruntime/python/tools/transformers/models/stable_diffusion/engine_builder_ort_cuda.py @@ -6,7 +6,6 @@ import gc import logging import os -from typing import Dict, List, Optional import onnx import torch @@ -72,7 +71,7 @@ def metadata(self, name: str): data[f"{name}.gpu_graph_id"] = self.current_gpu_binding.last_run_gpu_graph_id return data - def infer(self, feed_dict: Dict[str, torch.Tensor]): + def infer(self, feed_dict: dict[str, torch.Tensor]): return self.current_gpu_binding.infer(feed_dict=feed_dict, disable_cuda_graph_in_run=not self.enable_cuda_graph) def allocate_buffers(self, shape_dict, device): @@ -93,7 +92,7 @@ def __init__( onnx_opset_version: int, use_cuda_graph: bool, fp16: bool = True, - force_fp32_ops: Optional[List[str]] = None, + force_fp32_ops: list[str] | None = None, optimize_by_ort: bool = True, ): self.onnx_opset_version = onnx_opset_version @@ -140,7 +139,7 @@ def _configure( onnx_opset_version: int, use_cuda_graph: bool, fp16: bool = True, - force_fp32_ops: Optional[List[str]] = None, + force_fp32_ops: list[str] | None = None, optimize_by_ort: bool = True, ): self.model_config[model_name] = _ModelConfig( @@ -238,11 +237,11 @@ def build_engines( engine_dir: str, framework_model_dir: str, onnx_dir: str, - tmp_dir: Optional[str] = None, + tmp_dir: str | None = None, onnx_opset_version: int = 17, device_id: int = 0, save_fp32_intermediate_model: bool = False, - import_engine_dir: Optional[str] = None, + import_engine_dir: str | None = None, max_cuda_graphs: int = 1, ): self.torch_device = torch.device("cuda", device_id) diff --git a/onnxruntime/python/tools/transformers/models/stable_diffusion/optimize_pipeline.py b/onnxruntime/python/tools/transformers/models/stable_diffusion/optimize_pipeline.py index ffcfd6d9fd7e0..24897756b2d7a 100644 --- a/onnxruntime/python/tools/transformers/models/stable_diffusion/optimize_pipeline.py +++ b/onnxruntime/python/tools/transformers/models/stable_diffusion/optimize_pipeline.py @@ -21,13 +21,14 @@ import shutil import tempfile from pathlib import Path -from typing import List, Optional import __init__ # noqa: F401. Walk-around to run this script directly import coloredlogs import onnx from fusion_options import FusionOptions from onnx_model_clip import ClipOnnxModel +from onnx_model_mmdit import MmditOnnxModel +from onnx_model_t5 import T5OnnxModel from onnx_model_unet import UnetOnnxModel from onnx_model_vae import VaeOnnxModel from optimizer import optimize_by_onnxruntime, optimize_model @@ -46,12 +47,64 @@ def has_external_data(onnx_model_path): return False +def is_sd_3(source_dir: Path): + return (source_dir / "text_encoder_3").exists() + + +def is_sdxl(source_dir: Path): + return ( + (source_dir / "text_encoder_2").exists() + and not (source_dir / "text_encoder_3").exists() + and not (source_dir / "transformer").exists() + ) + + +def is_flux(source_dir: Path): + return ( + (source_dir / "text_encoder_2").exists() + and not (source_dir / "text_encoder_3").exists() + and (source_dir / "transformer").exists() + ) + + +def _classify_pipeline_type(source_dir: Path): + # May also check _class_name in model_index.json like `StableDiffusion3Pipeline` or `FluxPipeline` etc to classify. + if is_sd_3(source_dir): + return "sd3" + + if is_flux(source_dir): + return "flux" + + if is_sdxl(source_dir): + return "sdxl" + + # sd 1.x and 2.x + return "sd" + + +def _get_model_list(pipeline_type: str): + if pipeline_type == "sd3": + return ["text_encoder", "text_encoder_2", "text_encoder_3", "transformer", "vae_encoder", "vae_decoder"] + + if pipeline_type == "flux": + return ["text_encoder", "text_encoder_2", "transformer", "vae_encoder", "vae_decoder"] + + if pipeline_type == "sdxl": + return ["text_encoder", "text_encoder_2", "unet", "vae_encoder", "vae_decoder"] + + assert pipeline_type == "sd" + return ["text_encoder", "unet", "vae_encoder", "vae_decoder"] + + def _optimize_sd_pipeline( source_dir: Path, target_dir: Path, - use_external_data_format: Optional[bool], + pipeline_type: str, + model_list: list[str], + use_external_data_format: bool | None, float16: bool, - force_fp32_ops: List[str], + bfloat16: bool, + force_fp32_ops: list[str], enable_runtime_optimization: bool, args, ): @@ -60,8 +113,10 @@ def _optimize_sd_pipeline( Args: source_dir (Path): Root of input directory of stable diffusion onnx pipeline with float32 models. target_dir (Path): Root of output directory of stable diffusion onnx pipeline with optimized models. + model_list (List[str]): list of directory names with onnx model. use_external_data_format (Optional[bool]): use external data format. float16 (bool): use half precision + bfloat16 (bool): use bfloat16 as fallback if float16 is also provided. force_fp32_ops(List[str]): operators that are forced to run in float32. enable_runtime_optimization(bool): run graph optimization using Onnx Runtime. @@ -69,12 +124,15 @@ def _optimize_sd_pipeline( RuntimeError: input onnx model does not exist RuntimeError: output onnx model path existed """ + is_flux_pipeline = pipeline_type == "flux" model_type_mapping = { + "transformer": "mmdit", "unet": "unet", "vae_encoder": "vae", "vae_decoder": "vae", "text_encoder": "clip", - "text_encoder_2": "clip", + "text_encoder_2": "t5" if is_flux_pipeline else "clip", + "text_encoder_3": "t5", # t5-v1_1-xxl is used in SD 3.x text_encoder_3 and Flux text_encoder_2. "safety_checker": "unet", } @@ -82,6 +140,8 @@ def _optimize_sd_pipeline( "unet": UnetOnnxModel, "vae": VaeOnnxModel, "clip": ClipOnnxModel, + "t5": T5OnnxModel, + "mmdit": MmditOnnxModel, } force_fp32_operators = { @@ -91,9 +151,140 @@ def _optimize_sd_pipeline( "text_encoder": [], "text_encoder_2": [], "safety_checker": [], + "text_encoder_3": [], + "transformer": [], + } + + # The node block list is generated by running the fp32 model and get statistics of node inputs and outputs. + # Nodes with any input or output of float or double data type, but value ouf of range of float16 are candidates. + # python optimize_pipeline.py -i ./flux1_schnell_onnx/fp32 -o ./flux1_schnell_onnx/fp32_opt + # export ORT_DEBUG_NODE_IO_DUMP_STATISTICS_DATA=1 + # export ORT_DEBUG_NODE_IO_DUMP_INPUT_DATA=1 + # export ORT_DEBUG_NODE_IO_DUMP_OUTPUT_DATA=1 + # python benchmark.py --height 1024 --width 1024 --steps 4 -b 1 -v Flux.1S -p flux1_schnell_onnx/fp32_opt -e optimum >stdout.txt 2>stderr.txt + # Warning: The node name might change in different export settings. See benchmark_flux.sh for the settings. + flux_node_block_list = { + "text_encoder_2": [ + "/encoder/block.10/layer.1/DenseReluDense/wo/MatMul", + "SkipLayerNorm_20", + "SkipLayerNorm_21", + "SkipLayerNorm_22", + "SkipLayerNorm_23", + "SkipLayerNorm_24", + "SkipLayerNorm_25", + "SkipLayerNorm_26", + "SkipLayerNorm_27", + "SkipLayerNorm_28", + "SkipLayerNorm_29", + "SkipLayerNorm_30", + "SkipLayerNorm_31", + "SkipLayerNorm_32", + "SkipLayerNorm_33", + "SkipLayerNorm_34", + "SkipLayerNorm_35", + "SkipLayerNorm_36", + "SkipLayerNorm_37", + "SkipLayerNorm_38", + "SkipLayerNorm_39", + "SkipLayerNorm_40", + "SkipLayerNorm_41", + "SkipLayerNorm_42", + "SkipLayerNorm_43", + "SkipLayerNorm_44", + "SkipLayerNorm_45", + "/encoder/block.23/layer.1/DenseReluDense/wo/MatMul", + "SkipLayerNorm_46", + ], + "vae_decoder": [ + "/decoder/mid_block/attentions.0/MatMul", + "/decoder/mid_block/attentions.0/Softmax", + ], + "transformer": [ + "/transformer_blocks.18/Mul_5", + "/transformer_blocks.18/Add_7", + "/Concat_1", + "LayerNorm_76", + "/single_transformer_blocks.0/Add", + "LayerNorm_77", + "/single_transformer_blocks.1/Add", + "LayerNorm_78", + "/single_transformer_blocks.2/Add", + "LayerNorm_79", + "/single_transformer_blocks.3/Add", + "LayerNorm_80", + "/single_transformer_blocks.4/Add", + "LayerNorm_81", + "/single_transformer_blocks.5/Add", + "LayerNorm_82", + "/single_transformer_blocks.6/Add", + "LayerNorm_83", + "/single_transformer_blocks.7/Add", + "LayerNorm_84", + "/single_transformer_blocks.8/Add", + "LayerNorm_85", + "/single_transformer_blocks.9/Add", + "LayerNorm_86", + "/single_transformer_blocks.10/Add", + "LayerNorm_87", + "/single_transformer_blocks.11/Add", + "LayerNorm_88", + "/single_transformer_blocks.12/Add", + "LayerNorm_89", + "/single_transformer_blocks.13/Add", + "LayerNorm_90", + "/single_transformer_blocks.14/Add", + "LayerNorm_91", + "/single_transformer_blocks.15/Add", + "LayerNorm_92", + "/single_transformer_blocks.16/Add", + "LayerNorm_93", + "/single_transformer_blocks.17/Add", + "LayerNorm_94", + "/single_transformer_blocks.18/Add", + "LayerNorm_95", + "/single_transformer_blocks.19/Add", + "LayerNorm_96", + "/single_transformer_blocks.20/Add", + "LayerNorm_97", + "/single_transformer_blocks.21/Add", + "LayerNorm_98", + "/single_transformer_blocks.22/Add", + "LayerNorm_99", + "/single_transformer_blocks.23/Add", + "LayerNorm_100", + "/single_transformer_blocks.24/Add", + "LayerNorm_101", + "/single_transformer_blocks.25/Add", + "LayerNorm_102", + "/single_transformer_blocks.26/Add", + "LayerNorm_103", + "/single_transformer_blocks.27/Add", + "LayerNorm_104", + "/single_transformer_blocks.28/Add", + "LayerNorm_105", + "/single_transformer_blocks.29/Add", + "LayerNorm_106", + "/single_transformer_blocks.30/Add", + "LayerNorm_107", + "/single_transformer_blocks.31/Add", + "LayerNorm_108", + "/single_transformer_blocks.32/Add", + "LayerNorm_109", + "/single_transformer_blocks.33/Add", + "LayerNorm_110", + "/single_transformer_blocks.34/Add", + "LayerNorm_111", + "/single_transformer_blocks.35/Add", + "LayerNorm_112", + "/single_transformer_blocks.36/Add", + "LayerNorm_113", + "/single_transformer_blocks.37/Add", + "/Shape", + "/Slice", + ], } - is_xl = (source_dir / "text_encoder_2").exists() + sd3_node_block_list = {"text_encoder_3": flux_node_block_list["text_encoder_2"]} if force_fp32_ops: for fp32_operator in force_fp32_ops: @@ -105,16 +296,21 @@ def _optimize_sd_pipeline( f"--force_fp32_ops shall be in the format of module:operator like unet:Attention, got {fp32_operator}" ) + op_counters = {} for name, model_type in model_type_mapping.items(): onnx_model_path = source_dir / name / "model.onnx" if not os.path.exists(onnx_model_path): - if name != "safety_checker": - logger.info("input onnx model does not exist: %s", onnx_model_path) + if name != "safety_checker" and name in model_list: + logger.warning("input onnx model does not exist: %s", onnx_model_path) # some model are optional so we do not raise error here. continue # Prepare output directory optimized_model_path = target_dir / name / "model.onnx" + if os.path.exists(optimized_model_path): + if not args.overwrite: + logger.warning("Skipped optimization since the target file existed: %s", optimized_model_path) + continue output_dir = optimized_model_path.parent output_dir.mkdir(parents=True, exist_ok=True) @@ -122,7 +318,7 @@ def _optimize_sd_pipeline( use_external_data_format = has_external_data(onnx_model_path) # Graph fusion before fp16 conversion, otherwise they cannot be fused later. - logger.info(f"Optimize {onnx_model_path}...") + logger.info("Optimize %s ...", onnx_model_path) args.model_type = model_type fusion_options = FusionOptions.parse(args) @@ -146,8 +342,28 @@ def _optimize_sd_pipeline( ) if float16: + model_node_block_list = ( + flux_node_block_list if is_flux_pipeline else sd3_node_block_list if pipeline_type == "sd3" else {} + ) + if name in model_node_block_list: + # Opset 12 does not support bfloat16. + # By default, optimum exports T5 model with opset 12. So we need to check the opset version. + use_bfloat16 = bfloat16 + if use_bfloat16: + for opset in m.model.opset_import: + if opset.domain in ["", "ai.onnx"] and opset.version < 13: + logger.warning( + "onnx model requires opset 13 or higher to use bfloat16. Fall back to float32." + ) + use_bfloat16 = False + + m.convert_float_to_float16( + keep_io_types=False, + node_block_list=model_node_block_list[name], + use_bfloat16_as_blocked_nodes_dtype=use_bfloat16, + ) # For SD-XL, use FP16 in VAE decoder will cause NaN and black image so we keep it in FP32. - if is_xl and name == "vae_decoder": + elif pipeline_type in ["sdxl"] and name in ["vae_decoder"]: logger.info("Skip converting %s to float16 to avoid NaN", name) else: logger.info("Convert %s to float16 ...", name) @@ -175,23 +391,26 @@ def _optimize_sd_pipeline( m = model_type_class_mapping[model_type](model) m.get_operator_statistics() - m.get_fused_operator_statistics() + op_counters[name] = m.get_fused_operator_statistics() m.save_model_to_file(str(optimized_model_path), use_external_data_format=use_external_data_format) logger.info("%s is optimized", name) logger.info("*" * 20) + return op_counters + -def _copy_extra_directory(source_dir: Path, target_dir: Path): +def _copy_extra_directory(source_dir: Path, target_dir: Path, model_list: list[str]): """Copy extra directory that does not have onnx model Args: source_dir (Path): source directory target_dir (Path): target directory + model_list (List[str]): list of directory names with onnx model. Raises: RuntimeError: source path does not exist """ - extra_dirs = ["scheduler", "tokenizer", "tokenizer_2", "feature_extractor"] + extra_dirs = ["scheduler", "tokenizer", "tokenizer_2", "tokenizer_3", "feature_extractor"] for name in extra_dirs: source_path = source_dir / name @@ -199,6 +418,8 @@ def _copy_extra_directory(source_dir: Path, target_dir: Path): continue target_path = target_dir / name + if target_path.exists(): + shutil.rmtree(target_path) shutil.copytree(source_path, target_path) logger.info("%s => %s", source_path, target_path) @@ -213,8 +434,7 @@ def _copy_extra_directory(source_dir: Path, target_dir: Path): logger.info("%s => %s", source_path, target_path) # Some directory are optional - onnx_model_dirs = ["text_encoder", "text_encoder_2", "unet", "vae_encoder", "vae_decoder", "safety_checker"] - for onnx_model_dir in onnx_model_dirs: + for onnx_model_dir in model_list: source_path = source_dir / onnx_model_dir / "config.json" target_path = target_dir / onnx_model_dir / "config.json" if source_path.exists(): @@ -227,7 +447,7 @@ def optimize_stable_diffusion_pipeline( input_dir: str, output_dir: str, overwrite: bool, - use_external_data_format: Optional[bool], + use_external_data_format: bool | None, float16: bool, enable_runtime_optimization: bool, args, @@ -235,27 +455,31 @@ def optimize_stable_diffusion_pipeline( if os.path.exists(output_dir): if overwrite: shutil.rmtree(output_dir, ignore_errors=True) - else: - raise RuntimeError("output directory existed:{output_dir}. Add --overwrite to empty the directory.") source_dir = Path(input_dir) target_dir = Path(output_dir) target_dir.mkdir(parents=True, exist_ok=True) - _copy_extra_directory(source_dir, target_dir) + pipeline_type = _classify_pipeline_type(source_dir) + model_list = _get_model_list(pipeline_type) - _optimize_sd_pipeline( + _copy_extra_directory(source_dir, target_dir, model_list) + + return _optimize_sd_pipeline( source_dir, target_dir, + pipeline_type, + model_list, use_external_data_format, float16, + args.bfloat16, args.force_fp32_ops, enable_runtime_optimization, args, ) -def parse_arguments(argv: Optional[List[str]] = None): +def parse_arguments(argv: list[str] | None = None): """Parse arguments Returns: @@ -283,10 +507,18 @@ def parse_arguments(argv: Optional[List[str]] = None): "--float16", required=False, action="store_true", - help="Output models of half or mixed precision.", + help="Output models of float16, except some nodes falls back to float32 or bfloat16 to avoid overflow.", ) parser.set_defaults(float16=False) + parser.add_argument( + "--bfloat16", + required=False, + action="store_true", + help="Allow bfloat16 as fallback if --float16 is also provided.", + ) + parser.set_defaults(bfloat16=False) + parser.add_argument( "--force_fp32_ops", required=False, @@ -337,10 +569,13 @@ def parse_arguments(argv: Optional[List[str]] = None): return args -def main(argv: Optional[List[str]] = None): +def main(argv: list[str] | None = None): args = parse_arguments(argv) + logger.info("Arguments: %s", str(args)) - optimize_stable_diffusion_pipeline( + + # Return op counters for testing purpose. + return optimize_stable_diffusion_pipeline( args.input, args.output, args.overwrite, args.use_external_data_format, args.float16, args.inspect, args ) diff --git a/onnxruntime/python/tools/transformers/models/stable_diffusion/pipeline_stable_diffusion.py b/onnxruntime/python/tools/transformers/models/stable_diffusion/pipeline_stable_diffusion.py index 522cc541c1e57..ecfb3f0ef2e46 100644 --- a/onnxruntime/python/tools/transformers/models/stable_diffusion/pipeline_stable_diffusion.py +++ b/onnxruntime/python/tools/transformers/models/stable_diffusion/pipeline_stable_diffusion.py @@ -24,7 +24,7 @@ import pathlib import random import time -from typing import Any, Dict, List, Optional +from typing import Any import numpy as np import nvtx @@ -383,7 +383,7 @@ def tokenize(prompt, output_hidden_states): # Note: negative prompt embedding is not needed for SD XL when guidance <= 1 if do_classifier_free_guidance: # For SD XL base, handle force_zeros_for_empty_prompt - is_empty_negative_prompt = all([not i for i in negative_prompt]) + is_empty_negative_prompt = all(not i for i in negative_prompt) if force_zeros_for_empty_prompt and is_empty_negative_prompt: uncond_embeddings = torch.zeros_like(text_embeddings) if output_hidden_states: @@ -485,7 +485,7 @@ def decode_latent(self, latents): self.stop_profile("vae") return images - def print_summary(self, tic, toc, batch_size, vae_enc=False, pil=False) -> Dict[str, Any]: + def print_summary(self, tic, toc, batch_size, vae_enc=False, pil=False) -> dict[str, Any]: throughput = batch_size / (toc - tic) latency_clip = cudart.cudaEventElapsedTime(self.events["clip-start"], self.events["clip-stop"])[1] latency_unet = cudart.cudaEventElapsedTime(self.events["denoise-start"], self.events["denoise-stop"])[1] @@ -546,7 +546,7 @@ def pt_to_numpy(images: torch.FloatTensor): """ return ((images + 1) / 2).clamp(0, 1).detach().permute(0, 2, 3, 1).float().cpu().numpy() - def metadata(self) -> Dict[str, Any]: + def metadata(self) -> dict[str, Any]: data = { "actual_steps": self.actual_steps, "seed": self.get_current_seed(), @@ -561,14 +561,14 @@ def metadata(self) -> Dict[str, Any]: return data - def save_images(self, images: List, prompt: List[str], negative_prompt: List[str], metadata: Dict[str, Any]): + def save_images(self, images: list, prompt: list[str], negative_prompt: list[str], metadata: dict[str, Any]): session_id = str(random.randint(1000, 9999)) for i, image in enumerate(images): seed = str(self.get_current_seed()) prefix = "".join(x for x in prompt[i] if x.isalnum() or x in ", -").replace(" ", "_")[:20] parts = [prefix, session_id, str(i + 1), str(seed), self.current_scheduler, str(self.actual_steps)] image_path = os.path.join(self.output_dir, "-".join(parts) + ".png") - print(f"Saving image {i+1} / {len(images)} to: {image_path}") + print(f"Saving image {i + 1} / {len(images)} to: {image_path}") from PIL import PngImagePlugin @@ -747,17 +747,17 @@ def _infer( def run( self, - prompt: List[str], - negative_prompt: List[str], + prompt: list[str], + negative_prompt: list[str], image_height: int, image_width: int, denoising_steps: int = 30, guidance: float = 5.0, - seed: Optional[int] = None, - image: Optional[torch.Tensor] = None, + seed: int | None = None, + image: torch.Tensor | None = None, strength: float = 0.3, - controlnet_images: Optional[torch.Tensor] = None, - controlnet_scales: Optional[torch.Tensor] = None, + controlnet_images: torch.Tensor | None = None, + controlnet_scales: torch.Tensor | None = None, show_latency: bool = False, output_type: str = "pil", deterministic: bool = False, diff --git a/onnxruntime/python/tools/transformers/models/stable_diffusion/test/check_image.py b/onnxruntime/python/tools/transformers/models/stable_diffusion/test/check_image.py index 86477a7e3168b..ab3d3d8f58545 100644 --- a/onnxruntime/python/tools/transformers/models/stable_diffusion/test/check_image.py +++ b/onnxruntime/python/tools/transformers/models/stable_diffusion/test/check_image.py @@ -1,6 +1,5 @@ import argparse import os -from typing import Optional import cv2 import open_clip @@ -19,7 +18,7 @@ def arg_parser(): return args -def image_encoder(img: Image.Image, cache_dir: Optional[str] = None): # -> torch.Tensor: +def image_encoder(img: Image.Image, cache_dir: str | None = None): # -> torch.Tensor: device = "cuda" if torch.cuda.is_available() else "cpu" model, _, preprocess = open_clip.create_model_and_transforms( "ViT-B-16-plus-240", pretrained="laion400m_e32", cache_dir=cache_dir @@ -46,7 +45,7 @@ def load_image(image_path: str): # -> Image.Image: return img -def generate_score(image1: str, image2: str, cache_dir: Optional[str] = None): # -> float: +def generate_score(image1: str, image2: str, cache_dir: str | None = None): # -> float: test_img = load_image(image1) data_img = load_image(image2) img1 = image_encoder(test_img, cache_dir) diff --git a/onnxruntime/python/tools/transformers/models/t5/past_helper.py b/onnxruntime/python/tools/transformers/models/t5/past_helper.py index 915b09da79fe6..0f72a89498dad 100644 --- a/onnxruntime/python/tools/transformers/models/t5/past_helper.py +++ b/onnxruntime/python/tools/transformers/models/t5/past_helper.py @@ -5,7 +5,6 @@ # -------------------------------------------------------------------------- import logging -from typing import List, Tuple import torch @@ -71,7 +70,7 @@ def group_by_layer(past, num_layers): ) @staticmethod - def back_group_by_layer(past_key_values: Tuple[Tuple[torch.Tensor]]): + def back_group_by_layer(past_key_values: tuple[tuple[torch.Tensor]]): """Categorize present_key_values from self and cross attention to layer by layer. Reorder past state from grouped by self/cross attention to grouped by layer. @@ -101,7 +100,7 @@ def back_group_by_layer(past_key_values: Tuple[Tuple[torch.Tensor]]): return past_tuples @staticmethod - def group_by_self_and_cross(present_key_values: Tuple[torch.Tensor], concat: bool = False): + def group_by_self_and_cross(present_key_values: tuple[torch.Tensor], concat: bool = False): """Categorize present_key_values into self and cross attention. Split present state from grouped by layer to grouped by self/cross attention. @@ -118,8 +117,8 @@ def group_by_self_and_cross(present_key_values: Tuple[torch.Tensor], concat: boo present_self (Tuple[torch.Tensor]): present key and values from self attention present_cross (Tuple[torch.Tensor]): present key and values from cross attention """ - present_self: List[torch.Tensor] = [] - present_cross: List[torch.Tensor] = [] + present_self: list[torch.Tensor] = [] + present_cross: list[torch.Tensor] = [] for _, present_layer_i in enumerate(present_key_values): assert len(present_layer_i) == 4, f"Expected to have four items. Got {len(present_layer_i)}" present_key_self, present_value_self, present_key_cross, present_value_cross = present_layer_i @@ -131,7 +130,7 @@ def group_by_self_and_cross(present_key_values: Tuple[torch.Tensor], concat: boo return present_self, present_cross @staticmethod - def get_input_names(past_key_values: Tuple[Tuple[torch.Tensor]], encoder=True): + def get_input_names(past_key_values: tuple[tuple[torch.Tensor]], encoder=True): """Process input names of model wrapper. Args: diff --git a/onnxruntime/python/tools/transformers/models/t5/t5_decoder.py b/onnxruntime/python/tools/transformers/models/t5/t5_decoder.py index 19e6bba22dc1a..a93c1705b2cd9 100644 --- a/onnxruntime/python/tools/transformers/models/t5/t5_decoder.py +++ b/onnxruntime/python/tools/transformers/models/t5/t5_decoder.py @@ -8,7 +8,6 @@ import os import tempfile from pathlib import Path -from typing import List, Optional, Union import numpy import onnx @@ -34,8 +33,8 @@ def __init__( self, decoder: torch.nn.Module, lm_head: torch.nn.Module, - config: Union[T5Config, MT5Config], - decoder_start_token_id: Optional[int] = None, + config: T5Config | MT5Config, + decoder_start_token_id: int | None = None, ): super().__init__() self.decoder = decoder @@ -133,11 +132,11 @@ def __init__( ): self.decoder_input_ids: torch.LongTensor = decoder_input_ids self.encoder_attention_mask: torch.LongTensor = encoder_attention_mask - self.past_key_values: Union[List[torch.FloatTensor], List[torch.HalfTensor], None] = past_key_values + self.past_key_values: list[torch.FloatTensor] | list[torch.HalfTensor] | None = past_key_values @staticmethod def create_dummy( - config: Union[T5Config, MT5Config], + config: T5Config | MT5Config, batch_size: int, encode_sequence_length: int, past_decode_sequence_length: int, @@ -211,7 +210,7 @@ def create_dummy( return T5DecoderInputs(decoder_input_ids, encoder_inputs.attention_mask, past) - def to_list(self) -> List: + def to_list(self) -> list: input_list = [ self.decoder_input_ids, self.encoder_attention_mask, @@ -232,7 +231,7 @@ def to_fp32(self): class T5DecoderHelper: @staticmethod def export_onnx( - decoder: Union[T5Decoder, T5DecoderInit], + decoder: T5Decoder | T5DecoderInit, device: torch.device, onnx_model_path: str, verbose: bool = True, @@ -370,7 +369,7 @@ def onnxruntime_inference(ort_session, inputs: T5DecoderInputs): @staticmethod def verify_onnx( - model: Union[T5Decoder, T5DecoderInit], + model: T5Decoder | T5DecoderInit, ort_session: InferenceSession, device: torch.device, use_int32_inputs: bool, diff --git a/onnxruntime/python/tools/transformers/models/t5/t5_encoder.py b/onnxruntime/python/tools/transformers/models/t5/t5_encoder.py index fb61e970c1e0c..c6b0f7ee3adc2 100644 --- a/onnxruntime/python/tools/transformers/models/t5/t5_encoder.py +++ b/onnxruntime/python/tools/transformers/models/t5/t5_encoder.py @@ -9,7 +9,6 @@ import random import tempfile from pathlib import Path -from typing import List, Union import numpy import onnx @@ -26,7 +25,7 @@ class T5Encoder(torch.nn.Module): """T5 encoder outputs only the last hidden state""" - def __init__(self, encoder, config: Union[T5Config, MT5Config]): + def __init__(self, encoder, config: T5Config | MT5Config): super().__init__() self.encoder = encoder self.config = config @@ -72,7 +71,7 @@ def create_dummy( attention_mask[i, :padding_position] = 0 return T5EncoderInputs(input_ids, attention_mask) - def to_list(self) -> List: + def to_list(self) -> list: input_list = [v for v in [self.input_ids, self.attention_mask] if v is not None] return input_list diff --git a/onnxruntime/python/tools/transformers/models/t5/t5_encoder_decoder_init.py b/onnxruntime/python/tools/transformers/models/t5/t5_encoder_decoder_init.py index fd6ea45ef8b7c..c76d7aabdf11a 100644 --- a/onnxruntime/python/tools/transformers/models/t5/t5_encoder_decoder_init.py +++ b/onnxruntime/python/tools/transformers/models/t5/t5_encoder_decoder_init.py @@ -8,7 +8,6 @@ import os import tempfile from pathlib import Path -from typing import List, Optional, Union import numpy import onnx @@ -33,8 +32,8 @@ def __init__( encoder: torch.nn.Module, decoder: torch.nn.Module, lm_head: torch.nn.Module, - config: Union[T5Config, MT5Config], - decoder_start_token_id: Optional[int] = None, + config: T5Config | MT5Config, + decoder_start_token_id: int | None = None, ): super().__init__() self.config = config @@ -62,7 +61,7 @@ def __init__(self, encoder_input_ids, encoder_attention_mask, decoder_input_ids= @staticmethod def create_dummy( - config: Union[T5Config, MT5Config], + config: T5Config | MT5Config, batch_size: int, encode_sequence_length: int, use_decoder_input_ids: int, @@ -83,7 +82,7 @@ def create_dummy( return T5EncoderDecoderInitInputs(encoder_inputs.input_ids, encoder_inputs.attention_mask, decoder_input_ids) - def to_list(self) -> List: + def to_list(self) -> list: input_list = [self.encoder_input_ids, self.encoder_attention_mask] if self.decoder_input_ids is not None: input_list.append(self.decoder_input_ids) diff --git a/onnxruntime/python/tools/transformers/models/t5/t5_helper.py b/onnxruntime/python/tools/transformers/models/t5/t5_helper.py index f7dc9db0e82c8..d3f25e979887d 100755 --- a/onnxruntime/python/tools/transformers/models/t5/t5_helper.py +++ b/onnxruntime/python/tools/transformers/models/t5/t5_helper.py @@ -7,7 +7,6 @@ import logging import os from pathlib import Path -from typing import Dict, List, Union import torch from float16 import float_to_float16_max_diff @@ -64,7 +63,7 @@ def load_model( merge_encoder_and_decoder_init: bool = True, model_type: str = "t5", state_dict_path: str = "", - ) -> Dict[str, torch.nn.Module]: + ) -> dict[str, torch.nn.Module]: """Load model given a pretrained name or path, then build models for ONNX conversion. Args: @@ -111,7 +110,7 @@ def load_model( @staticmethod def export_onnx( - model: Union[T5Encoder, T5Decoder, T5DecoderInit, T5EncoderDecoderInit], + model: T5Encoder | T5Decoder | T5DecoderInit | T5EncoderDecoderInit, device: torch.device, onnx_model_path: str, verbose: bool = True, @@ -151,7 +150,7 @@ def export_onnx( @staticmethod def auto_mixed_precision( onnx_model: OnnxModel, - op_block_list: List[str] = [ # noqa: B006 + op_block_list: list[str] = [ # noqa: B006 "SimplifiedLayerNormalization", "SkipSimplifiedLayerNormalization", "Relu", @@ -257,7 +256,7 @@ def optimize_onnx( @staticmethod def verify_onnx( - model: Union[T5Encoder, T5Decoder, T5DecoderInit, T5EncoderDecoderInit], + model: T5Encoder | T5Decoder | T5DecoderInit | T5EncoderDecoderInit, ort_session: InferenceSession, device: torch.device, use_int32_inputs: bool, diff --git a/onnxruntime/python/tools/transformers/models/whisper/benchmark.py b/onnxruntime/python/tools/transformers/models/whisper/benchmark.py index 3f7a292a02748..a111db1edc257 100644 --- a/onnxruntime/python/tools/transformers/models/whisper/benchmark.py +++ b/onnxruntime/python/tools/transformers/models/whisper/benchmark.py @@ -331,7 +331,7 @@ def gen_and_dec(inputs): def run_ort_inference(args, inputs, model): def prepare_ort_inputs(inputs, warmup=False): # Check that all model inputs will be provided - model_inputs = set(map(lambda model_input: model_input.name, model.get_inputs())) + model_inputs = {model_input.name for model_input in model.get_inputs()} user_inputs = set(inputs.keys()) missing_inputs = model_inputs - user_inputs if len(missing_inputs): @@ -593,7 +593,7 @@ def main(): model = get_model(args) if args.benchmark_type == "ort": # Check for optional inputs that could have been added during export - ort_model_inputs = set(map(lambda model_input: model_input.name, model.get_inputs())) + ort_model_inputs = {model_input.name for model_input in model.get_inputs()} args.has_audio_stream = "audio_stream" in ort_model_inputs setattr(args, "has_decoder_input_ids", "decoder_input_ids" in ort_model_inputs) # noqa: B010 setattr(args, "has_logits_processor", "logits_processor" in ort_model_inputs) # noqa: B010 diff --git a/onnxruntime/python/tools/transformers/models/whisper/benchmark_all.py b/onnxruntime/python/tools/transformers/models/whisper/benchmark_all.py index b7f5c2294f395..c84ac81606634 100644 --- a/onnxruntime/python/tools/transformers/models/whisper/benchmark_all.py +++ b/onnxruntime/python/tools/transformers/models/whisper/benchmark_all.py @@ -372,9 +372,7 @@ def main(): # Calculate forced decoder input ids hf_forced_decoder_ids = processor.get_decoder_prompt_ids(language=args.language, task=args.task) - ort_forced_decoder_ids = [config.decoder_start_token_id] + list( # noqa: RUF005 - map(lambda token_id: token_id[1], hf_forced_decoder_ids) - ) + ort_forced_decoder_ids = [config.decoder_start_token_id] + [token_id[1] for token_id in hf_forced_decoder_ids] hf_decoder_input_ids_cmd = ( ["--decoder-input-ids", str(hf_forced_decoder_ids)] if args.language and args.task else [] ) diff --git a/onnxruntime/python/tools/transformers/models/whisper/whisper_chain.py b/onnxruntime/python/tools/transformers/models/whisper/whisper_chain.py index 87ac45101f0c0..feb688948d8f5 100644 --- a/onnxruntime/python/tools/transformers/models/whisper/whisper_chain.py +++ b/onnxruntime/python/tools/transformers/models/whisper/whisper_chain.py @@ -24,7 +24,7 @@ def verify_inputs(beam_inputs, graph_inputs): # Verify that ONNX graph's inputs match beam search op's inputs beam_required_inputs = list(filter(lambda beam_input: beam_input, beam_inputs)) assert len(graph_inputs) == len(beam_required_inputs) - for graph_input, beam_input in zip(graph_inputs, beam_required_inputs): + for graph_input, beam_input in zip(graph_inputs, beam_required_inputs, strict=False): # Check if graph_input is in beam_input to handle beam_input names with the "_fp16" suffix assert graph_input.name in beam_input diff --git a/onnxruntime/python/tools/transformers/models/whisper/whisper_decoder.py b/onnxruntime/python/tools/transformers/models/whisper/whisper_decoder.py index 5da235d72ca0b..400cafc4c93c3 100644 --- a/onnxruntime/python/tools/transformers/models/whisper/whisper_decoder.py +++ b/onnxruntime/python/tools/transformers/models/whisper/whisper_decoder.py @@ -8,7 +8,6 @@ import os import tempfile from pathlib import Path -from typing import List, Optional, Union import numpy import onnx @@ -34,7 +33,7 @@ def __init__( self, decoder: torch.nn.Module, config: WhisperConfig, - decoder_start_token_id: Optional[int] = None, + decoder_start_token_id: int | None = None, ): super().__init__() self.decoder = decoder @@ -115,7 +114,7 @@ def __init__( past_key_values=None, ): self.decoder_input_ids: torch.LongTensor = decoder_input_ids - self.past_key_values: Union[List[torch.FloatTensor], List[torch.HalfTensor], None] = past_key_values + self.past_key_values: list[torch.FloatTensor] | list[torch.HalfTensor] | None = past_key_values @staticmethod def create_dummy( @@ -186,7 +185,7 @@ def create_dummy( return WhisperDecoderInputs(decoder_input_ids, past) - def to_list(self) -> List: + def to_list(self) -> list: input_list = [self.decoder_input_ids] if self.past_key_values: input_list.extend(self.past_key_values) @@ -333,7 +332,7 @@ def onnxruntime_inference(ort_session, inputs: WhisperDecoderInputs): @staticmethod def verify_onnx( - model: Union[WhisperDecoder, WhisperDecoderInit], + model: WhisperDecoder | WhisperDecoderInit, ort_session: InferenceSession, device: torch.device, use_int32_inputs: bool, diff --git a/onnxruntime/python/tools/transformers/models/whisper/whisper_encoder.py b/onnxruntime/python/tools/transformers/models/whisper/whisper_encoder.py index 93281848a5c9c..0b9db81486caa 100644 --- a/onnxruntime/python/tools/transformers/models/whisper/whisper_encoder.py +++ b/onnxruntime/python/tools/transformers/models/whisper/whisper_encoder.py @@ -8,7 +8,6 @@ import os import tempfile from pathlib import Path -from typing import List import numpy import onnx @@ -67,7 +66,7 @@ def create_dummy( ) return WhisperEncoderInputs(input_features) - def to_list(self) -> List: + def to_list(self) -> list: if self.input_ids is None: return [] return [self.input_ids] diff --git a/onnxruntime/python/tools/transformers/models/whisper/whisper_encoder_decoder_init.py b/onnxruntime/python/tools/transformers/models/whisper/whisper_encoder_decoder_init.py index fab2a2aa4c8a8..c7c7a7675c1a7 100644 --- a/onnxruntime/python/tools/transformers/models/whisper/whisper_encoder_decoder_init.py +++ b/onnxruntime/python/tools/transformers/models/whisper/whisper_encoder_decoder_init.py @@ -8,7 +8,6 @@ import os import tempfile from pathlib import Path -from typing import List, Optional import numpy import onnx @@ -34,7 +33,7 @@ def __init__( encoder: torch.nn.Module, decoder: torch.nn.Module, config: WhisperConfig, - decoder_start_token_id: Optional[int] = None, + decoder_start_token_id: int | None = None, model_impl: str = "hf", model: torch.nn.Module = None, ): @@ -94,7 +93,7 @@ def create_dummy( return WhisperEncoderDecoderInitInputs(encoder_inputs.input_ids, decoder_input_ids) - def to_list(self) -> List: + def to_list(self) -> list: input_list = [self.encoder_input_ids] if self.decoder_input_ids is not None: input_list.append(self.decoder_input_ids) diff --git a/onnxruntime/python/tools/transformers/models/whisper/whisper_helper.py b/onnxruntime/python/tools/transformers/models/whisper/whisper_helper.py index 9fb51dd9b43c0..80d22185d9887 100644 --- a/onnxruntime/python/tools/transformers/models/whisper/whisper_helper.py +++ b/onnxruntime/python/tools/transformers/models/whisper/whisper_helper.py @@ -7,7 +7,6 @@ import logging import os from pathlib import Path -from typing import Dict, Tuple, Union import numpy as np import torch @@ -117,7 +116,7 @@ def load_model( device: torch.device, merge_encoder_and_decoder_init: bool = True, state_dict_path: str = "", - ) -> Dict[str, torch.nn.Module]: + ) -> dict[str, torch.nn.Module]: """Load model given a pretrained name or path, then build models for ONNX conversion. Args: @@ -170,7 +169,7 @@ def load_model( @staticmethod def export_onnx( - model: Union[WhisperEncoder, WhisperDecoder, WhisperDecoderInit, WhisperEncoderDecoderInit], + model: WhisperEncoder | WhisperDecoder | WhisperDecoderInit | WhisperEncoderDecoderInit, device: torch.device, onnx_model_path: str, verbose: bool = True, @@ -209,7 +208,7 @@ def export_onnx( @staticmethod def auto_mixed_precision( onnx_model: OnnxModel, - op_block_list: Tuple[str] = ( + op_block_list: tuple[str] = ( "SimplifiedLayerNormalization", "SkipSimplifiedLayerNormalization", "Relu", @@ -224,7 +223,7 @@ def auto_mixed_precision( Returns: parameters(dict): a dictionary of parameters used in float16 conversion """ - op_full_set = set([node.op_type for node in onnx_model.nodes()]) + op_full_set = {node.op_type for node in onnx_model.nodes()} fp32_op_set = set(op_block_list) fp16_op_set = op_full_set.difference(fp32_op_set) logger.info(f"fp32 op: {fp32_op_set} fp16 op: {fp16_op_set}") @@ -445,11 +444,11 @@ def verify_onnx( start_id = [config.decoder_start_token_id] # ex: [50258] prompt_ids = processor.get_decoder_prompt_ids(language="english", task="transcribe") - prompt_ids = list(map(lambda token: token[1], prompt_ids)) # ex: [50259, 50358, 50363] + prompt_ids = [token[1] for token in prompt_ids] # ex: [50259, 50358, 50363] forced_decoder_ids = start_id + prompt_ids # ex: [50258, 50259, 50358, 50363] - ort_names = list(map(lambda entry: entry.name, ort_session.get_inputs())) - ort_dtypes = list(map(lambda entry: entry.type, ort_session.get_inputs())) + ort_names = [entry.name for entry in ort_session.get_inputs()] + ort_dtypes = [entry.type for entry in ort_session.get_inputs()] ort_to_np = { "tensor(float)": np.float32, "tensor(float16)": np.float16, @@ -460,7 +459,7 @@ def verify_onnx( } use_extra_decoding_ids = "extra_decoding_ids" in ort_names - for name, dtype in zip(ort_names, ort_dtypes): + for name, dtype in zip(ort_names, ort_dtypes, strict=False): if name == "input_features": inputs[name] = inputs[name].detach().cpu().numpy() elif name == "vocab_mask": diff --git a/onnxruntime/python/tools/transformers/models/whisper/whisper_openai_helper.py b/onnxruntime/python/tools/transformers/models/whisper/whisper_openai_helper.py index 849c3059f21f7..8c78fb86a211e 100644 --- a/onnxruntime/python/tools/transformers/models/whisper/whisper_openai_helper.py +++ b/onnxruntime/python/tools/transformers/models/whisper/whisper_openai_helper.py @@ -33,7 +33,7 @@ def forward( remove_hooks=False, ): # Create a kv_cache for past_values - past_kv_cache = dict() + past_kv_cache = {} if past is not None: # Convert past values from 4D to 3D past = [torch.transpose(val, 1, 2) for val in past] diff --git a/onnxruntime/python/tools/transformers/onnx_model.py b/onnxruntime/python/tools/transformers/onnx_model.py index fe80a08829263..ef80d36be3b18 100644 --- a/onnxruntime/python/tools/transformers/onnx_model.py +++ b/onnxruntime/python/tools/transformers/onnx_model.py @@ -9,7 +9,6 @@ import sys from collections import deque from pathlib import Path -from typing import Dict, List, Optional, Tuple from float16 import convert_float_to_float16 from onnx import ( @@ -35,16 +34,16 @@ def __init__(self, model): def initialize(self, model): self.model: ModelProto = model - self._node_name_suffix: Dict[str, int] = {} # key is node name prefix, value is the last suffix generated + self._node_name_suffix: dict[str, int] = {} # key is node name prefix, value is the last suffix generated self.shape_infer_helper: SymbolicShapeInferenceHelper = None self.enable_shape_infer: bool = True - self.all_graphs: Optional[List[GraphProto]] = None + self.all_graphs: list[GraphProto] | None = None # Cache of shape and data type from onnx graph to speed up optimization. # Be careful that fusion shall not reuse node output name for different shape/type (in adding/removing nodes) # Note that these do not cache the symbolic shape inference result. - self._dtype_dict: Optional[Dict[str, int]] = None - self._shape_dict: Optional[Dict[str, List]] = None + self._dtype_dict: dict[str, int] | None = None + self._shape_dict: dict[str, list] | None = None def disable_shape_inference(self): self.enable_shape_infer = False @@ -233,15 +232,21 @@ def get_nodes_by_op_type(self, op_type): nodes.append(node) return nodes - def get_children(self, node, input_name_to_nodes=None): + def get_children(self, node, input_name_to_nodes=None, output_index=None): if input_name_to_nodes is None: input_name_to_nodes = self.input_name_to_nodes() children = [] - for output in node.output: - if output in input_name_to_nodes: - for node in input_name_to_nodes[output]: - children.append(node) # noqa: PERF402 + if output_index is not None: + if output_index < len(node.output): + output = node.output[output_index] + if output in input_name_to_nodes: + children = list(input_name_to_nodes[output]) + else: + for output in node.output: + if output in input_name_to_nodes: + children.extend(input_name_to_nodes[output]) + return children def get_parents(self, node, output_name_to_node=None): @@ -342,7 +347,7 @@ def match_parent( def match_parent_paths(self, node, paths, output_name_to_node): for i, path in enumerate(paths): - assert isinstance(path, (List, Tuple)) + assert isinstance(path, (list, tuple)) return_indice = [] matched = self.match_parent_path(node, path[0], path[1], output_name_to_node, return_indice) if matched: @@ -352,7 +357,7 @@ def match_parent_paths(self, node, paths, output_name_to_node): def match_parent_paths_all(self, node, paths, output_name_to_node): match_i, matches, return_indices = [], [], [] for i, path in enumerate(paths): - assert isinstance(path, (List, Tuple)) + assert isinstance(path, (list, tuple)) return_indice = [] matched = self.match_parent_path(node, path[0], path[1], output_name_to_node, return_indice) if matched: @@ -436,48 +441,63 @@ def match_child_path( self, node, child_op_types, - child_output_index=None, - return_indice=None, + edges: list[tuple[int, int]] | None = None, + input_name_to_nodes=None, exclude=[], # noqa: B006 ): """ Find a sequence of input edges based on constraints on parent op_type and index. - When input_index is None, we will find the first parent node based on constraints, - and return_indice will be appended the corresponding input index. + Note that we use greedy approach and only consider the first matched child, so it has chance to miss matching. Args: node (str): current node name. child_op_types (str): constraint of child node op_type of each input edge. - child_output_index (list): constraint of input index of each input edge. None means no constraint. - return_indice (list): a list to append the input index - When there is no constraint on input index of an edge. + edges (list): each edge is represented by two integers: output index of parent node, input index of child node. + None means no constraint. + exclude(list): list of nodes that are excluded (not allowed to match as child). Returns: children: a list of matched children node. """ - if child_output_index is not None: - assert len(child_output_index) == len(child_op_types) + if edges is not None: + assert len(edges) == len(child_op_types) + for edge in edges: + assert ( + isinstance(edge, tuple) and len(edge) == 2 and isinstance(edge[0], int) and isinstance(edge[1], int) + ) + + if input_name_to_nodes is None: + input_name_to_nodes = self.input_name_to_nodes() current_node = node matched_children = [] for i, op_type in enumerate(child_op_types): matched_child = None - node_children = self.get_children(current_node) - for child_i, child in enumerate(node_children): + + if edges is None: + children_nodes = self.get_children(current_node, input_name_to_nodes=input_name_to_nodes) + else: + children_nodes = self.get_children( + current_node, input_name_to_nodes=input_name_to_nodes, output_index=edges[i][0] + ) + + for child in children_nodes: if child.op_type == op_type and child not in exclude: - if child_output_index is not None and child_output_index[i] != child_i: - logger.debug( - f"Failed to match index={i} child_output_index={child_output_index[i]} op_type={op_type}", - stack_info=True, - ) - return None + if edges is not None and child.input[edges[i][1]] != current_node.output[edges[i][0]]: + continue + + # Here we use greedy approach and only consider the first matched child. + # TODO: match recursively if we encounter cases that the correct child is not the first matched. matched_child = child + break + if matched_child is None: - logger.debug(f"Failed to match child op_type={op_type}", stack_info=True) + logger.debug(f"Failed to match child {i} op_type={op_type}", stack_info=True) return None matched_children.append(matched_child) current_node = matched_child + return matched_children def find_first_parent_by_type(self, node, parent_type, output_name_to_node=None, recursive=True): @@ -579,7 +599,7 @@ def tensor_shape_to_list(self, tensor_type): shape_list.append("?") # shall not happen return shape_list - def get_dtype(self, name: str, symbolic_shape_helper: Optional[SymbolicShapeInferenceHelper] = None): + def get_dtype(self, name: str, symbolic_shape_helper: SymbolicShapeInferenceHelper | None = None): """Try get data type given a name (could be initializer, input or output of graph or node).""" if self._dtype_dict is None: @@ -604,7 +624,7 @@ def get_dtype(self, name: str, symbolic_shape_helper: Optional[SymbolicShapeInfe return None - def get_shape(self, name: str, symbolic_shape_helper: Optional[SymbolicShapeInferenceHelper] = None): + def get_shape(self, name: str, symbolic_shape_helper: SymbolicShapeInferenceHelper | None = None): """Try get shape given a name (could be initializer, input or output of graph or node).""" if self._shape_dict is None: @@ -1263,7 +1283,7 @@ def get_operator_statistics(self, include_domain=False): op_count[op] = 1 if op not in op_count else (op_count[op] + 1) # Sorted by count in the descending order, then by key in alphabetical order. - logger.info(f"Operators:{sorted(op_count.items(), key=lambda kv:(-kv[1], kv[0]))}") + logger.info(f"Operators:{sorted(op_count.items(), key=lambda kv: (-kv[1], kv[0]))}") return op_count @@ -1299,8 +1319,8 @@ def to_data_hash(tensor: TensorProto, base_dir: str = "") -> int: def has_same_value( tensor1: TensorProto, tensor2: TensorProto, - signature_cache1: Optional[dict] = None, - signature_cache2: Optional[dict] = None, + signature_cache1: dict | None = None, + signature_cache2: dict | None = None, ) -> bool: """Returns True when two tensors have same value. Note that name can be different. @@ -1333,7 +1353,7 @@ def has_same_value( return False - def remove_duplicated_initializer(self, cache: Optional[dict] = None): + def remove_duplicated_initializer(self, cache: dict | None = None): """Remove initializers with duplicated values, and only keep the first one. It could help reduce size of models (like ALBert) with shared weights. If require_raw_data passed, method will only compare raw_data initializers to speed runtime diff --git a/onnxruntime/python/tools/transformers/onnx_model_bart.py b/onnxruntime/python/tools/transformers/onnx_model_bart.py index 61a786d7af60b..496146dbf8cb5 100644 --- a/onnxruntime/python/tools/transformers/onnx_model_bart.py +++ b/onnxruntime/python/tools/transformers/onnx_model_bart.py @@ -3,7 +3,6 @@ # Licensed under the MIT License. # -------------------------------------------------------------------------- import logging -from typing import Optional from fusion_attention import AttentionMask from fusion_bart_attention import FusionBartAttention @@ -127,7 +126,7 @@ def __init__(self, model, num_heads, hidden_size, model_impl="hf"): self.attention_fusion = FusionBartAttention(self, self.hidden_size, self.num_heads, self.attention_mask) self.bart_reshape_fusion_preprocess = FusionBartReshape(self) - def optimize(self, options: Optional[FusionOptions] = None, add_dynamic_axes: bool = False): + def optimize(self, options: FusionOptions | None = None, add_dynamic_axes: bool = False): self.attention_fusion.use_multi_head_attention = False if options is None else options.use_multi_head_attention self.attention_fusion.disable_multi_head_attention_bias = ( False if options is None else options.disable_multi_head_attention_bias diff --git a/onnxruntime/python/tools/transformers/onnx_model_bert.py b/onnxruntime/python/tools/transformers/onnx_model_bert.py index 26464fc32817d..09a8017a102e2 100644 --- a/onnxruntime/python/tools/transformers/onnx_model_bert.py +++ b/onnxruntime/python/tools/transformers/onnx_model_bert.py @@ -4,7 +4,6 @@ # -------------------------------------------------------------------------- from logging import getLogger -from typing import List, Optional from convert_to_packing_mode import PackingMode from fusion_attention import AttentionMask, FusionAttention @@ -133,7 +132,7 @@ def fuse_rotary_embeddings(self): self.model.graph.node, ) ) - non_ms_domains_to_keep = set(map(lambda node: node.domain, rot_emb_nodes)) + non_ms_domains_to_keep = {node.domain for node in rot_emb_nodes} i = 0 while i < len(self.model.functions): fn = self.model.functions[i] @@ -147,7 +146,7 @@ def fuse_qordered_mamtul(self): fusion = FusionQOrderedMatMul(self) fusion.apply() - def get_graph_inputs_from_node_type(self, op_type: str, input_indices: List[int], casted: bool): + def get_graph_inputs_from_node_type(self, op_type: str, input_indices: list[int], casted: bool): """ Get graph inputs that feed into node type (like EmbedLayerNormalization or Attention). Returns a list of the graph input names based on the filter whether it is casted or not. @@ -323,7 +322,7 @@ def postprocess(self): self.clean_graph() self.prune_graph() - def optimize(self, options: Optional[FusionOptions] = None, add_dynamic_axes: bool = False): + def optimize(self, options: FusionOptions | None = None, add_dynamic_axes: bool = False): if (options is not None) and not options.enable_shape_inference: self.disable_shape_inference() diff --git a/onnxruntime/python/tools/transformers/onnx_model_clip.py b/onnxruntime/python/tools/transformers/onnx_model_clip.py index 388d058c7856c..725be3c762e5a 100644 --- a/onnxruntime/python/tools/transformers/onnx_model_clip.py +++ b/onnxruntime/python/tools/transformers/onnx_model_clip.py @@ -27,6 +27,7 @@ def get_fused_operator_statistics(self): "Gelu", "LayerNormalization", "QuickGelu", + "BiasGelu", "SkipLayerNormalization", ] for op in ops: diff --git a/onnxruntime/python/tools/transformers/onnx_model_conformer.py b/onnxruntime/python/tools/transformers/onnx_model_conformer.py index 1506d85f53fd4..65723aabc2e18 100644 --- a/onnxruntime/python/tools/transformers/onnx_model_conformer.py +++ b/onnxruntime/python/tools/transformers/onnx_model_conformer.py @@ -3,7 +3,6 @@ # Licensed under the MIT License. # -------------------------------------------------------------------------- import logging -from typing import Optional from fusion_attention import AttentionMask from fusion_conformer_attention import FusionConformerAttention @@ -19,7 +18,7 @@ def __init__(self, model, num_heads, hidden_size): self.attention_mask = AttentionMask(self) self.attention_fusion = FusionConformerAttention(self, self.hidden_size, self.num_heads, self.attention_mask) - def optimize(self, options: Optional[FusionOptions] = None, add_dynamic_axes: bool = False): + def optimize(self, options: FusionOptions | None = None, add_dynamic_axes: bool = False): self.attention_fusion.use_multi_head_attention = False if options is None else options.use_multi_head_attention self.attention_fusion.disable_multi_head_attention_bias = ( False if options is None else options.disable_multi_head_attention_bias diff --git a/onnxruntime/python/tools/transformers/onnx_model_mmdit.py b/onnxruntime/python/tools/transformers/onnx_model_mmdit.py new file mode 100644 index 0000000000000..35a574129e78c --- /dev/null +++ b/onnxruntime/python/tools/transformers/onnx_model_mmdit.py @@ -0,0 +1,112 @@ +# ------------------------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. +# -------------------------------------------------------------------------- + +import logging + +from fusion_layernorm import FusionLayerNormalization +from fusion_mha_mmdit import FusionMultiHeadAttentionMMDit +from fusion_options import FusionOptions +from import_utils import is_installed +from onnx import ModelProto +from onnx_model_bert import BertOnnxModel + +logger = logging.getLogger(__name__) + + +class MmditOnnxModel(BertOnnxModel): + def __init__(self, model: ModelProto, num_heads: int = 0, hidden_size: int = 0): + """Initialize Multimodal Diffusion Transformer (MMDiT) ONNX Model. + + Args: + model (ModelProto): the ONNX model + num_heads (int, optional): number of attention heads. Defaults to 0 (detect the parameter automatically). + hidden_size (int, optional): hidden dimension. Defaults to 0 (detect the parameter automatically). + """ + assert (num_heads == 0 and hidden_size == 0) or (num_heads > 0 and hidden_size % num_heads == 0) + super().__init__(model, num_heads=num_heads, hidden_size=hidden_size) + + def postprocess(self): + self.prune_graph() + self.remove_unused_constant() + + def fuse_layer_norm(self): + layernorm_support_broadcast = True + logger.warning( + "The optimized model requires LayerNormalization with broadcast support. " + "Please use onnxruntime-gpu>=1.21 for inference." + ) + fusion = FusionLayerNormalization( + self, check_constant_and_dimension=not layernorm_support_broadcast, force=True + ) + fusion.apply() + + def fuse_multi_head_attention(self): + fusion = FusionMultiHeadAttentionMMDit(self) + fusion.apply() + + def optimize(self, options: FusionOptions | None = None, add_dynamic_axes: bool = False): + assert not add_dynamic_axes + + if is_installed("tqdm"): + import tqdm + from tqdm.contrib.logging import logging_redirect_tqdm + + with logging_redirect_tqdm(): + steps = 5 + progress_bar = tqdm.tqdm(range(steps), initial=0, desc="fusion") + self._optimize(options, progress_bar) + else: + logger.info("tqdm is not installed. Run optimization without progress bar") + self._optimize(options, None) + + def _optimize(self, options: FusionOptions | None = None, progress_bar=None): + if (options is not None) and not options.enable_shape_inference: + self.disable_shape_inference() + + # Remove cast nodes that having same data type of input and output based on symbolic shape inference. + self.utils.remove_useless_cast_nodes() + if progress_bar: + progress_bar.update(1) + + if (options is None) or options.enable_layer_norm: + self.fuse_layer_norm() + self.fuse_simplified_layer_norm() + if progress_bar: + progress_bar.update(1) + + if (options is None) or options.enable_gelu: + self.fuse_gelu() + if progress_bar: + progress_bar.update(1) + + if (options is None) or options.enable_attention: + self.fuse_multi_head_attention() + if progress_bar: + progress_bar.update(1) + + self.postprocess() + if progress_bar: + progress_bar.update(1) + + logger.info(f"opset version: {self.get_opset_version()}") + + def get_fused_operator_statistics(self): + """ + Returns node count of fused operators. + """ + op_count = {} + ops = [ + "FastGelu", + "MultiHeadAttention", + "LayerNormalization", + "SimplifiedLayerNormalization", + ] + + for op in ops: + nodes = self.get_nodes_by_op_type(op) + op_count[op] = len(nodes) + + logger.info(f"Optimized operators:{op_count}") + return op_count diff --git a/onnxruntime/python/tools/transformers/onnx_model_phi.py b/onnxruntime/python/tools/transformers/onnx_model_phi.py index 5df765033578b..d2f10d0bc18af 100644 --- a/onnxruntime/python/tools/transformers/onnx_model_phi.py +++ b/onnxruntime/python/tools/transformers/onnx_model_phi.py @@ -4,7 +4,6 @@ # -------------------------------------------------------------------------- from logging import getLogger -from typing import List, Optional import numpy as np from dynamo_onnx_helper import DynamoOnnxHelper @@ -70,7 +69,7 @@ class Fission(Fusion): def __init__( self, model: OnnxModel, - nodes_to_find: List[str], + nodes_to_find: list[str], ): super().__init__(model, "DONOTUSE", nodes_to_find) @@ -129,7 +128,7 @@ def replace_fp32_value_info(self, name, shape): self.model.graph().value_info.extend([new_value_info]) def set_unique_name_and_add_nodes( - self, subgraph_nodes: List[NodeProto], layer_id: int, layer_known_edges_names: List[str] + self, subgraph_nodes: list[NodeProto], layer_id: int, layer_known_edges_names: list[str] ): for new_node in subgraph_nodes: for i, name in enumerate(new_node.input): @@ -148,7 +147,7 @@ def set_unique_name_and_add_nodes( self.nodes_to_add.append(new_node) self.node_name_to_graph_name[new_node.name] = self.this_graph_name - def layernorm(self, inputs: List[str], outputs: List[str], prefix: str = ""): + def layernorm(self, inputs: list[str], outputs: list[str], prefix: str = ""): assert len(inputs) == 3 assert len(outputs) == 1 node = helper.make_node( @@ -160,7 +159,7 @@ def layernorm(self, inputs: List[str], outputs: List[str], prefix: str = ""): ) return [node] - def gemm(self, inputs: List[str], outputs: List[str], prefix: str = ""): + def gemm(self, inputs: list[str], outputs: list[str], prefix: str = ""): assert len(inputs) == 3 assert len(outputs) == 1 matmul = helper.make_node( @@ -177,7 +176,7 @@ def gemm(self, inputs: List[str], outputs: List[str], prefix: str = ""): ) return [matmul, add] - def rotary(self, inputs: List[str], outputs: List[str], prefix: str = "", rot_dim=32, num_heads=32): + def rotary(self, inputs: list[str], outputs: list[str], prefix: str = "", rot_dim=32, num_heads=32): assert len(inputs) == 4 assert len(outputs) == 1 node = helper.make_node( @@ -191,7 +190,7 @@ def rotary(self, inputs: List[str], outputs: List[str], prefix: str = "", rot_di ) return [node] - def fastgelu(self, inputs: List[str], outputs: List[str], prefix: str = ""): + def fastgelu(self, inputs: list[str], outputs: list[str], prefix: str = ""): assert len(inputs) == 1 assert len(outputs) == 1 node = helper.make_node( @@ -203,7 +202,7 @@ def fastgelu(self, inputs: List[str], outputs: List[str], prefix: str = ""): ) return [node] - def add(self, inputs: List[str], outputs: List[str], prefix: str = ""): + def add(self, inputs: list[str], outputs: list[str], prefix: str = ""): assert len(inputs) == 2 assert len(outputs) == 1 node = helper.make_node( @@ -214,7 +213,7 @@ def add(self, inputs: List[str], outputs: List[str], prefix: str = ""): ) return [node] - def mha(self, inputs: List[str], outputs: List[str], prefix: str = "", num_heads=32): + def mha(self, inputs: list[str], outputs: list[str], prefix: str = "", num_heads=32): assert len(inputs) == 8 assert len(outputs) == 3 node = helper.make_node( @@ -228,7 +227,7 @@ def mha(self, inputs: List[str], outputs: List[str], prefix: str = "", num_heads ) return [node] - def gqa(self, inputs: List[str], outputs: List[str], prefix: str = "", num_heads=32): + def gqa(self, inputs: list[str], outputs: list[str], prefix: str = "", num_heads=32): assert len(inputs) == 7 assert len(outputs) == 3 node = helper.make_node( @@ -242,7 +241,7 @@ def gqa(self, inputs: List[str], outputs: List[str], prefix: str = "", num_heads ) return [node] - def attention(self, inputs: List[str], outputs: List[str], prefix: str = "", num_heads=32): + def attention(self, inputs: list[str], outputs: list[str], prefix: str = "", num_heads=32): assert len(inputs) == 5 assert len(outputs) == 2 node = helper.make_node( @@ -260,8 +259,8 @@ def attention(self, inputs: List[str], outputs: List[str], prefix: str = "", num def paged_attn( self, - inputs: List[str], - outputs: List[str], + inputs: list[str], + outputs: list[str], prefix: str = "", num_heads=32, head_size=80, @@ -853,7 +852,7 @@ def __init__(self, model: ModelProto, num_heads: int, hidden_size: int): self.fission_transformer_layernorm = FissionTransformerLayerNormPhi(self) self.fission_transformer_embedding = FissionTransformerEmbeddingPhi(self) - def optimize(self, options: Optional[FusionOptions] = None, add_dynamic_axes: bool = False): + def optimize(self, options: FusionOptions | None = None, add_dynamic_axes: bool = False): assert options is not None attn_op_type = options.attention_op_type diff --git a/onnxruntime/python/tools/transformers/onnx_model_sam2.py b/onnxruntime/python/tools/transformers/onnx_model_sam2.py index ac608fb509a81..9d57081c4ce12 100644 --- a/onnxruntime/python/tools/transformers/onnx_model_sam2.py +++ b/onnxruntime/python/tools/transformers/onnx_model_sam2.py @@ -4,7 +4,6 @@ # -------------------------------------------------------------------------- import logging -from typing import Optional from fusion_attention_sam2 import FusionMultiHeadAttentionSam2 from fusion_layernorm import FusionLayerNormalizationNCHW @@ -39,11 +38,11 @@ def fuse_layer_norm(self): fusion = FusionLayerNormalizationNCHW(self) fusion.apply() - def fuse_multi_head_attention(self, options: Optional[FusionOptions] = None): + def fuse_multi_head_attention(self, options: FusionOptions | None = None): mha_fusion = FusionMultiHeadAttentionSam2(self, self.hidden_size, self.num_heads) mha_fusion.apply() - def optimize(self, options: Optional[FusionOptions] = None, add_dynamic_axes: bool = False): + def optimize(self, options: FusionOptions | None = None, add_dynamic_axes: bool = False): if is_installed("tqdm"): import tqdm from tqdm.contrib.logging import logging_redirect_tqdm @@ -56,7 +55,7 @@ def optimize(self, options: Optional[FusionOptions] = None, add_dynamic_axes: bo logger.info("tqdm is not installed. Run optimization without progress bar") self._optimize(options, None) - def _optimize(self, options: Optional[FusionOptions] = None, progress_bar=None): + def _optimize(self, options: FusionOptions | None = None, progress_bar=None): if (options is not None) and not options.enable_shape_inference: self.disable_shape_inference() diff --git a/onnxruntime/python/tools/transformers/onnx_model_t5.py b/onnxruntime/python/tools/transformers/onnx_model_t5.py index 9cc4878e8022d..33dcc7795a465 100644 --- a/onnxruntime/python/tools/transformers/onnx_model_t5.py +++ b/onnxruntime/python/tools/transformers/onnx_model_t5.py @@ -3,7 +3,6 @@ # Licensed under the MIT License. # -------------------------------------------------------------------------- import logging -from typing import Optional, Union import numpy as np from fusion_attention import AttentionMask, FusionAttention @@ -50,8 +49,8 @@ def create_attention_node( input: str, output: str, add_qk_str: str, - scale: Optional[float] = None, - ) -> Union[NodeProto, None]: + scale: float | None = None, + ) -> NodeProto | None: """Create an Attention node. Args: mask_index (str): mask input @@ -75,9 +74,10 @@ def create_attention_node( k_weight = self.model.get_initializer(k_matmul.input[1]) v_weight = self.model.get_initializer(v_matmul.input[1]) - if q_weight is None: + if q_weight is None or k_weight is None or v_weight is None: + matmul = q_matmul if q_weight is None else k_matmul if k_weight is None else v_matmul print( - f"{q_matmul.input[1]} is not an initializer. " + f"{matmul.input[1]} is not an initializer. " "Please set do_constant_folding=True in torch.onnx.export to unblock attention fusion" ) return None @@ -162,7 +162,7 @@ def create_mha_node( present_value: str, num_heads: int, hidden_size: int, - ) -> Union[NodeProto, None]: + ) -> NodeProto | None: assert num_heads > 0 if hidden_size > 0 and (hidden_size % num_heads) != 0: @@ -222,9 +222,7 @@ def fuse_t5_encoder(self, normalize_node, input_name_to_nodes, output_name_to_no return qkv_nodes = self.model.match_parent_path( - normalize_node, - ["MatMul", "Reshape", "Transpose", "MatMul"], - [1, 0, 0, 0], + normalize_node, ["MatMul", "Reshape", "Transpose", "MatMul"], [1, 0, 0, 0], output_name_to_node ) if qkv_nodes is None: return @@ -235,6 +233,7 @@ def fuse_t5_encoder(self, normalize_node, input_name_to_nodes, output_name_to_no reshape_qkv, ["Concat", "Unsqueeze", "Gather", "Shape"], [1, 0, 0, 0], + output_name_to_node, ) if qkv_shape_nodes is None: return @@ -244,6 +243,7 @@ def fuse_t5_encoder(self, normalize_node, input_name_to_nodes, output_name_to_no matmul_qkv, ["Transpose", "Reshape", "MatMul"], [1, 0, 0], + output_name_to_node, ) if v_nodes is None: return @@ -254,28 +254,64 @@ def fuse_t5_encoder(self, normalize_node, input_name_to_nodes, output_name_to_no matmul_qkv, ["Softmax", "Add", "MatMul"], [0, 0, 0], + output_name_to_node, ) if qk_nodes is None: return _, add_qk, matmul_qk = qk_nodes - mask_index = None mask_nodes = self.model.match_parent_path( add_qk, ["Add", "Mul", "Sub", "Cast", "Unsqueeze", "Unsqueeze"], [1, 1, 0, 1, 0, 0], + output_name_to_node, ) + + is_pattern_for_one_graph_input = mask_nodes is None if mask_nodes is None: - return - mul_node = mask_nodes[1] - if mask_nodes[1].op_type != "Mul": - return + # Pattern for SD3 and Flux. + mask_nodes = self.model.match_parent_path( + add_qk, + ["Add", "Slice", "Mul", "Sub", "Unsqueeze", "Unsqueeze"], + [1, 1, 0, 0, 1, 0], + output_name_to_node, + ) + if mask_nodes is None: + return + mul_node = mask_nodes[2] + else: + mul_node = mask_nodes[1] _, mul_val = self.model.get_constant_input(mul_node) - if mul_val != -10000: - self.mask_filter_value = mul_val + if mul_val is None: + return - mask_index = self.attention_mask.process_mask(mask_nodes[-1].input[0]) + if mul_val != -10000: + self.mask_filter_value = float(mul_val) + + # If the mask is derived from shape of input_ids, it means there is no padding mask. + mask_nodes_2 = self.model.match_parent_path( + mask_nodes[-1], + ["ConstantOfShape", "Concat", "Unsqueeze", "Gather", "Shape"], + [0, 0, 0, 0, 0], + output_name_to_node, + ) + mask_nodes_3 = self.model.match_parent_path( + mask_nodes[-1], + ["ConstantOfShape", "Concat", "Unsqueeze", "Gather", "Shape"], + [0, 0, 1, 0, 0], + output_name_to_node, + ) + if ( + mask_nodes_2 is not None + and any(input.name == mask_nodes_2[-1].input[0] for input in self.model.graph().input) + and mask_nodes_3 is not None + and mask_nodes_2[-1].input[0] == mask_nodes_3[-1].input[0] + and len(mask_nodes_2[1].input) == 2 + ): + mask_index = "" + else: + mask_index = self.attention_mask.process_mask(mask_nodes[-1].input[0]) res_pos_bias = None rpb_nodes = self.model.match_parent_path( @@ -283,10 +319,17 @@ def fuse_t5_encoder(self, normalize_node, input_name_to_nodes, output_name_to_no ["Add", "RelativePositionBias"], [1, 0], ) + if rpb_nodes is None and is_pattern_for_one_graph_input: + # Pattern for SD3 and Flux. + rpb_nodes = self.model.match_parent_path( + add_qk, + ["Add", "Slice", "RelativePositionBias"], + [1, 0, 0], + ) if rpb_nodes is None: return - rpb_add_node = rpb_nodes[0] - res_pos_bias = rpb_add_node.input[0] + + res_pos_bias = rpb_nodes[-1].output[0] k_nodes = self.model.match_parent_path( matmul_qk, @@ -332,13 +375,7 @@ def fuse_t5_encoder(self, normalize_node, input_name_to_nodes, output_name_to_no self.nodes_to_add.append(new_node) self.node_name_to_graph_name[new_node.name] = self.this_graph_name - self.nodes_to_remove.extend(qkv_nodes[1:]) - self.nodes_to_remove.extend(qk_nodes) - self.nodes_to_remove.extend(k_nodes[:-1]) - if v_nodes is not None: - self.nodes_to_remove.extend(v_nodes[:-1]) - self.nodes_to_remove.extend(q_nodes[:-1]) - + self.nodes_to_remove.append(reshape_qkv) self.prune_graph = True def fuse_t5_decoder(self, normalize_node, input_name_to_nodes, output_name_to_node): @@ -591,12 +628,7 @@ def fuse_t5_decoder(self, normalize_node, input_name_to_nodes, output_name_to_no self.nodes_to_add.append(new_node) self.node_name_to_graph_name[new_node.name] = self.this_graph_name - self.nodes_to_remove.extend(qkv_nodes[1:]) - self.nodes_to_remove.extend(qk_nodes) - self.nodes_to_remove.extend(k_nodes[:-1]) - if v_nodes is not None: - self.nodes_to_remove.extend(v_nodes[:-1]) - self.nodes_to_remove.extend(q_nodes[:-1]) + self.nodes_to_remove.append(reshape_qkv) self.prune_graph = True @@ -605,7 +637,6 @@ class FusionRelativePositionBiasBlock(Fusion): def __init__(self, model: OnnxModel, max_distance: int): super().__init__(model, "RelativePositionBias", ["Add", "Slice"]) self.max_distance = max_distance - # bidirectional=(not self.is_decoder) self.is_bidirectional = False def fuse(self, node, input_name_to_nodes, output_name_to_node): @@ -615,11 +646,11 @@ def fuse(self, node, input_name_to_nodes, output_name_to_node): return compute_bias_nodes = self.model.match_parent_path( - node, ["Unsqueeze", "Transpose", "Gather", "Where"], [0, 0, 0, 1] + node, ["Unsqueeze", "Transpose", "Gather", "Where"], [0, 0, 0, 1], output_name_to_node ) if compute_bias_nodes is None: compute_bias_nodes = self.model.match_parent_path( - node, ["Unsqueeze", "Transpose", "Gather", "Add", "Where"], [0, 0, 0, 1, 1] + node, ["Unsqueeze", "Transpose", "Gather", "Add", "Where"], [0, 0, 0, 1, 1], output_name_to_node ) if compute_bias_nodes is None: return @@ -632,20 +663,29 @@ def fuse(self, node, input_name_to_nodes, output_name_to_node): where, ["Min", "ConstantOfShape", "Shape", "Add", "Cast", "Mul", "Div", "Log", "Div"], [2, 1, 0, 0, 0, 0, 0, 0, 0], + output_name_to_node, ) if compute_buckets_nodes is None: return + # It is possible to deduce max_distance from a Div node: + # The value of self.model.get_constant_value(compute_buckets_nodes[-3].input[1]) is close to + # math.log(max_distance / (relative_attention_num_buckets // (4 if is_bidirectional else 2))) + # See https://github.com/huggingface/transformers/blob/608e163b527eaee41e650ffb9eb4c422d2679902/src/transformers/models/t5/modeling_t5.py#L397. + # Most t5 models use max_distance=128, so we hardcode it unitl we see a model with different value. + # TODO: maybe add a sanity check here. + div = compute_buckets_nodes[-1] range_nodes = self.model.match_parent_path( div, ["Cast", "Neg", "Min", "ConstantOfShape", "Shape", "Sub", "Unsqueeze", "Range"], [0, 0, 0, 1, 0, 0, 0, 0], + output_name_to_node, ) if range_nodes is None: range_nodes = self.model.match_parent_path( - div, ["Cast", "Abs", "Sub", "Unsqueeze", "Range"], [0, 0, 0, 0, 0] + div, ["Cast", "Abs", "Sub", "Unsqueeze", "Range"], [0, 0, 0, 0, 0], output_name_to_node ) self.is_bidirectional = True if range_nodes is None: @@ -653,17 +693,20 @@ def fuse(self, node, input_name_to_nodes, output_name_to_node): range_node = range_nodes[-1] - self.nodes_to_remove.extend(compute_bias_nodes) - self.nodes_to_remove.extend(compute_buckets_nodes) - self.nodes_to_remove.extend(range_nodes) + self.nodes_to_remove.append(unsqueeze) + self.prune_graph = True - node_name_prefix = "encoder" if self.is_bidirectional else "decoder" + node_name = self.model.create_node_name( + "RelativePositionBias", name_prefix="RelPosBias_" + ("encoder" if self.is_bidirectional else "decoder") + ) table_weight_i = self.model.get_initializer(gather.input[0]) + if table_weight_i is None: + return table_weight = NumpyHelper.to_array(table_weight_i) table_weight_t = np.transpose(table_weight) bias_table = helper.make_tensor( - name=self.model.create_node_name("bias_table_weight", name_prefix=node_name_prefix), + name=node_name + "_bias_table_weight", data_type=TensorProto.FLOAT, dims=[np.shape(table_weight)[0], np.shape(table_weight)[1]], vals=table_weight_t.tobytes(), @@ -677,7 +720,7 @@ def fuse(self, node, input_name_to_nodes, output_name_to_node): "RelativePositionBias", inputs=inputs, outputs=outputs, - name=self.model.create_node_name("RelativePositionBias", name_prefix=node_name_prefix), + name=node_name, ) rpb_node.domain = "com.microsoft" rpb_node.attribute.extend([helper.make_attribute("max_distance", self.max_distance)]) @@ -688,14 +731,19 @@ def fuse(self, node, input_name_to_nodes, output_name_to_node): class T5OnnxModel(BertOnnxModel): - def __init__(self, model, num_heads, hidden_size): + def __init__(self, model, num_heads: int = 0, hidden_size: int = 0): super().__init__(model, num_heads, hidden_size) self.attention_mask = AttentionMask(self) + + # When the model has only one input (input_ids), there is no padding mask. + if len(self.model.graph.input) == 1: + from fusion_options import AttentionMaskFormat + + self.attention_mask.mask_format = AttentionMaskFormat.NoMask + self.attention_fusion = FusionT5Attention(self, self.hidden_size, self.num_heads, self.attention_mask) self.layer_norm_fusion = FusionSimplifiedLayerNormalization(self) self.skip_layer_norm_fusion = FusionSkipSimplifiedLayerNormalization(self) - # TODO: consider retrieve max_distance from model. - # math.log(max_distance / (num_buckets // 2)) self.rpb_fusion = FusionRelativePositionBiasBlock(self, 128) def fuse_attention(self): @@ -704,9 +752,65 @@ def fuse_attention(self): def fuse_layer_norm(self): self.layer_norm_fusion.apply() - def fuse_skip_layer_norm(self): + def fuse_skip_layer_norm(self, shape_infer=True): self.skip_layer_norm_fusion.apply() + def adjust_rel_pos_bis_length_input(self): + # For T5 encoder, it uses complex logic to compute the query and key length when there is only one graph input (input_ids) + # We can directly get the length from shape (the 2nd dimension) of input_ids. + for node in self.nodes(): + if node.op_type == "RelativePositionBias": + nodes = self.match_parent_path( + node, + [ + "Gather", + "Shape", + "Transpose", + "Reshape", + "Concat", + "Unsqueeze", + "Gather", + "Shape", + "SimplifiedLayerNormalization", + "Gather", + ], + [1, 0, 0, 0, 1, 0, 0, 0, 0, 0], + ) + # TODO: more validation on node attributes + if nodes is not None: + graph_input_names = [input.name for input in self.model.graph.input] + if nodes[-1].input[1] in graph_input_names: + node_name = self.create_node_name("Shape", name_prefix="Added_Shape_") + shape_node = helper.make_node( + "Shape", + inputs=[nodes[-1].input[1]], + outputs=[node_name + "_Output"], + name=node_name, + ) + + indices_1 = helper.make_tensor( + name="Constant_Index_1", + data_type=TensorProto.INT64, + dims=[1], # Shape of the tensor + vals=[1], # Tensor values + ) + self.add_initializer(indices_1) + + gather = helper.make_node( + "Gather", + inputs=[node_name + "_Output", "Constant_Index_1"], + outputs=[node_name + "_Output_Gather_1"], + name=self.create_node_name("Gather", name_prefix="Added_Gather_"), + axis=0, + ) + + self.add_node(shape_node) + self.add_node(gather) + node.input[1] = node_name + "_Output_Gather_1" + node.input[2] = node_name + "_Output_Gather_1" + + break + # Remove get_extended_attention_mask() since it generates all zeros. def remove_extended_mask_decoder_init(self): nodes_to_remove = [] @@ -787,5 +891,6 @@ def postprocess(self): # remove get_extended_attention_mask() since it generates all zeros. self.remove_extended_mask_decoder_init() self.remove_extended_mask_decoder() + self.adjust_rel_pos_bis_length_input() self.prune_graph() diff --git a/onnxruntime/python/tools/transformers/onnx_model_tnlr.py b/onnxruntime/python/tools/transformers/onnx_model_tnlr.py index f5a47b19d67fc..125aa47a7dbed 100644 --- a/onnxruntime/python/tools/transformers/onnx_model_tnlr.py +++ b/onnxruntime/python/tools/transformers/onnx_model_tnlr.py @@ -3,7 +3,6 @@ # Licensed under the MIT License. # -------------------------------------------------------------------------- import logging -from typing import Union from fusion_attention import AttentionMask, FusionAttention from fusion_utils import NumpyHelper @@ -39,7 +38,7 @@ def create_attention_node( input: str, output: str, add_qk_str: str, - ) -> Union[NodeProto, None]: + ) -> NodeProto | None: assert num_heads > 0 if hidden_size > 0 and (hidden_size % num_heads) != 0: logger.debug(f"input hidden size {hidden_size} is not a multiple of num of heads {num_heads}") diff --git a/onnxruntime/python/tools/transformers/onnx_model_unet.py b/onnxruntime/python/tools/transformers/onnx_model_unet.py index 77e24986f0fde..e96cf32927171 100644 --- a/onnxruntime/python/tools/transformers/onnx_model_unet.py +++ b/onnxruntime/python/tools/transformers/onnx_model_unet.py @@ -4,7 +4,6 @@ # -------------------------------------------------------------------------- import logging -from typing import Optional from fusion_attention_unet import FusionAttentionUnet from fusion_bias_add import FusionBiasAdd @@ -91,7 +90,7 @@ def merge_adjacent_transpose(self): if total: logger.info("Removed %d Transpose nodes", total) - def fuse_multi_head_attention(self, options: Optional[FusionOptions] = None): + def fuse_multi_head_attention(self, options: FusionOptions | None = None): # Self Attention enable_packed_qkv = (options is None) or options.enable_packed_qkv self_attention_fusion = FusionAttentionUnet( @@ -120,7 +119,7 @@ def fuse_bias_add(self): fusion = FusionBiasAdd(self) fusion.apply() - def optimize(self, options: Optional[FusionOptions] = None): + def optimize(self, options: FusionOptions | None = None): if is_installed("tqdm"): import tqdm from tqdm.contrib.logging import logging_redirect_tqdm @@ -133,7 +132,7 @@ def optimize(self, options: Optional[FusionOptions] = None): logger.info("tqdm is not installed. Run optimization without progress bar") self._optimize(options, None) - def _optimize(self, options: Optional[FusionOptions] = None, progress_bar=None): + def _optimize(self, options: FusionOptions | None = None, progress_bar=None): if (options is not None) and not options.enable_shape_inference: self.disable_shape_inference() diff --git a/onnxruntime/python/tools/transformers/onnx_model_vae.py b/onnxruntime/python/tools/transformers/onnx_model_vae.py index de8b59074a871..1e531bbc3eff3 100644 --- a/onnxruntime/python/tools/transformers/onnx_model_vae.py +++ b/onnxruntime/python/tools/transformers/onnx_model_vae.py @@ -4,7 +4,6 @@ # -------------------------------------------------------------------------- from logging import getLogger -from typing import Optional from fusion_attention_vae import FusionAttentionVae from fusion_options import FusionOptions @@ -19,7 +18,7 @@ def __init__(self, model: ModelProto, num_heads: int = 0, hidden_size: int = 0): assert (num_heads == 0 and hidden_size == 0) or (num_heads > 0 and hidden_size % num_heads == 0) super().__init__(model, num_heads=num_heads, hidden_size=hidden_size) - def fuse_multi_head_attention(self, options: Optional[FusionOptions] = None): + def fuse_multi_head_attention(self, options: FusionOptions | None = None): # Self Attention self_attention_fusion = FusionAttentionVae(self, self.hidden_size, self.num_heads) self_attention_fusion.apply() diff --git a/onnxruntime/python/tools/transformers/onnx_utils.py b/onnxruntime/python/tools/transformers/onnx_utils.py index 64fade9369395..7f681d783cb64 100644 --- a/onnxruntime/python/tools/transformers/onnx_utils.py +++ b/onnxruntime/python/tools/transformers/onnx_utils.py @@ -35,7 +35,7 @@ def extract_raw_data_from_model(model: ModelProto): initializer.name = name initializer.ClearField("raw_data") - return zip(*external_data) + return zip(*external_data, strict=False) def has_external_data(model: ModelProto): diff --git a/onnxruntime/python/tools/transformers/optimizer.py b/onnxruntime/python/tools/transformers/optimizer.py index 933bd785dc00d..c4d187e8bf031 100644 --- a/onnxruntime/python/tools/transformers/optimizer.py +++ b/onnxruntime/python/tools/transformers/optimizer.py @@ -22,7 +22,6 @@ import os import tempfile from pathlib import Path -from typing import Dict, List, Optional, Union import coloredlogs from fusion_options import FusionOptions @@ -35,6 +34,7 @@ from onnx_model_clip import ClipOnnxModel from onnx_model_conformer import ConformerOnnxModel from onnx_model_gpt2 import Gpt2OnnxModel +from onnx_model_mmdit import MmditOnnxModel from onnx_model_phi import PhiOnnxModel from onnx_model_sam2 import Sam2OnnxModel from onnx_model_t5 import T5OnnxModel @@ -66,21 +66,22 @@ "unet": (UnetOnnxModel, "pytorch", 1), # UNet in Stable Diffusion "vae": (VaeOnnxModel, "pytorch", 1), # UAE in Stable Diffusion "vit": (BertOnnxModel, "pytorch", 1), + "mmdit": (MmditOnnxModel, "pytorch", 1), } def optimize_by_onnxruntime( - onnx_model: Optional[Union[str, ModelProto]] = None, + onnx_model: str | ModelProto | None = None, use_gpu: bool = False, - optimized_model_path: Optional[str] = None, - opt_level: Optional[int] = 99, - disabled_optimizers: List[str] = [], # noqa: B006 + optimized_model_path: str | None = None, + opt_level: int | None = 99, + disabled_optimizers: list[str] = [], # noqa: B006 verbose: bool = False, save_as_external_data: bool = False, external_data_filename: str = "", external_data_file_threshold: int = 1024, *, - provider: Optional[str] = None, + provider: str | None = None, **deprecated_kwargs, ) -> str: """ @@ -215,7 +216,7 @@ def optimize_by_fusion( model_type: str = "bert", num_heads: int = 0, hidden_size: int = 0, - optimization_options: Optional[FusionOptions] = None, + optimization_options: FusionOptions | None = None, ) -> OnnxModel: """Optimize Model by graph fusion logic. @@ -237,7 +238,9 @@ def optimize_by_fusion( Returns: object of an optimizer class. """ - if model_type not in ["bert", "swin", "unet", "vae", "clip", "sam2"] and (num_heads == 0 or hidden_size == 0): + if model_type not in ["bert", "t5", "swin", "unet", "vae", "clip", "sam2", "mmdit"] and ( + num_heads == 0 or hidden_size == 0 + ): logger.warning(f"Please specify parameters of num_heads and hidden_size for model_type {model_type}") if model_type not in MODEL_TYPES: @@ -270,17 +273,17 @@ def optimize_by_fusion( def optimize_model( - input: Union[str, ModelProto], + input: str | ModelProto, model_type: str = "bert", num_heads: int = 0, hidden_size: int = 0, - optimization_options: Optional[FusionOptions] = None, - opt_level: Optional[int] = None, + optimization_options: FusionOptions | None = None, + opt_level: int | None = None, use_gpu: bool = False, only_onnxruntime: bool = False, verbose: bool = False, *, - provider: Optional[str] = None, + provider: str | None = None, ) -> OnnxModel: """Optimize Model by OnnxRuntime and/or python fusion logic. @@ -410,7 +413,7 @@ def optimize_model( return optimizer -def get_fusion_statistics(optimized_model_path: str) -> Dict[str, int]: +def get_fusion_statistics(optimized_model_path: str) -> dict[str, int]: """ Get counter of fused operators in optimized model. diff --git a/onnxruntime/python/tools/transformers/quantize_helper.py b/onnxruntime/python/tools/transformers/quantize_helper.py index 6a25196dbc24c..9e44921bdeddf 100644 --- a/onnxruntime/python/tools/transformers/quantize_helper.py +++ b/onnxruntime/python/tools/transformers/quantize_helper.py @@ -64,7 +64,7 @@ def quantize_onnx_model(onnx_model_path, quantized_model_path, use_external_data from onnxruntime.quantization import quantize_dynamic Path(quantized_model_path).parent.mkdir(parents=True, exist_ok=True) - logger.info(f"Size of full precision ONNX model(MB):{os.path.getsize(onnx_model_path)/(1024*1024)}") + logger.info(f"Size of full precision ONNX model(MB):{os.path.getsize(onnx_model_path) / (1024 * 1024)}") quantize_dynamic( onnx_model_path, quantized_model_path, @@ -73,4 +73,4 @@ def quantize_onnx_model(onnx_model_path, quantized_model_path, use_external_data ) logger.info(f"quantized model saved to:{quantized_model_path}") # TODO: inlcude external data in total model size. - logger.info(f"Size of quantized ONNX model(MB):{os.path.getsize(quantized_model_path)/(1024*1024)}") + logger.info(f"Size of quantized ONNX model(MB):{os.path.getsize(quantized_model_path) / (1024 * 1024)}") diff --git a/onnxruntime/python/tools/transformers/shape_infer_helper.py b/onnxruntime/python/tools/transformers/shape_infer_helper.py index f1fc0c952e8e4..f4d65d05ad0c8 100644 --- a/onnxruntime/python/tools/transformers/shape_infer_helper.py +++ b/onnxruntime/python/tools/transformers/shape_infer_helper.py @@ -6,7 +6,6 @@ import logging import os import sys -from typing import Dict # In ORT Package the symbolic_shape_infer.py is in ../tools file_path = os.path.dirname(__file__) @@ -26,9 +25,9 @@ def __init__(self, model, verbose=0, int_max=2**31 - 1, auto_merge=True, guess_o self.model_ = model self.all_shapes_inferred_: bool = False self.is_inferred_: bool = False - self.dynamic_axis_mapping_: Dict[str, int] = {} + self.dynamic_axis_mapping_: dict[str, int] = {} - def infer(self, dynamic_axis_mapping: Dict[str, int], max_runs: int = 200): + def infer(self, dynamic_axis_mapping: dict[str, int], max_runs: int = 200): """Run shape inference, and try replace dynamic axis from string to integer when mapping is provided. Args: diff --git a/onnxruntime/python/tools/transformers/shape_optimizer.py b/onnxruntime/python/tools/transformers/shape_optimizer.py index 17fd54f19baf2..9f590dfb86911 100644 --- a/onnxruntime/python/tools/transformers/shape_optimizer.py +++ b/onnxruntime/python/tools/transformers/shape_optimizer.py @@ -16,7 +16,6 @@ from collections import deque # noqa: F401 from datetime import datetime from pathlib import Path # noqa: F401 -from typing import List, Optional import numpy as np import onnx @@ -271,7 +270,7 @@ def validate_input(self, input: str): valid_names = [input.name for input in self.model.graph.input] raise Exception(f"Input {input} does not exist in the graph inputs: {valid_names}") - def validate_outputs(self, output_names: List[str]): + def validate_outputs(self, output_names: list[str]): valid_names = [output.name for output in self.model.graph.output] for name in output_names: if name not in valid_names: @@ -285,7 +284,7 @@ def optimize( input_mask: str, enable_shape_opt: bool, enable_reshape_opt: bool, - output_names: Optional[List[str]] = None, + output_names: list[str] | None = None, batch_size=1, sequence_length=128, verbose=False, diff --git a/onnxruntime/python/torch_cpp_extensions/aten_op_executor/aten_op_executor.cc b/onnxruntime/python/torch_cpp_extensions/aten_op_executor/aten_op_executor.cc index f4d2f68d4d8b5..169ea313df531 100644 --- a/onnxruntime/python/torch_cpp_extensions/aten_op_executor/aten_op_executor.cc +++ b/onnxruntime/python/torch_cpp_extensions/aten_op_executor/aten_op_executor.cc @@ -57,7 +57,8 @@ struct ATenOperator { c10::IValue i_value; // Create the torch tensor from this DLPack no matter we need it or not below, // so that the dlpack's deleter will be triggered when torch tensor is out of scope. - at::Tensor tensor = at::fromDLPack(dlpack); + // work-around upstream pytorch changing fromDLPack to take non-const pointer + at::Tensor tensor = at::fromDLPack(const_cast(dlpack)); switch (elem_kinds[index]) { case c10::TypeKind::TensorType: { i_value = is_optional ? c10::IValue(c10::optional(tensor)) : c10::IValue(tensor); diff --git a/onnxruntime/test/contrib_ops/attention_lstm_data_gen.py b/onnxruntime/test/contrib_ops/attention_lstm_data_gen.py index 95b82df86b0fe..3abd6efb65ddd 100644 --- a/onnxruntime/test/contrib_ops/attention_lstm_data_gen.py +++ b/onnxruntime/test/contrib_ops/attention_lstm_data_gen.py @@ -413,7 +413,7 @@ sess.run(tf.Print(attention, [attention], "====Final Attention Context(bw)", summarize=10000)) for t in tensors: - shape_str = "[" + ",".join(list(map(lambda x: str(x.__int__()), t.get_shape()))) + "]" + shape_str = "[" + ",".join([str(x.__int__()) for x in t.get_shape()]) + "]" sess.run( tf.Print( t, diff --git a/onnxruntime/test/contrib_ops/fused_matmul_op_test.cc b/onnxruntime/test/contrib_ops/fused_matmul_op_test.cc index ad49560f526e0..db5ce1742e37c 100644 --- a/onnxruntime/test/contrib_ops/fused_matmul_op_test.cc +++ b/onnxruntime/test/contrib_ops/fused_matmul_op_test.cc @@ -222,10 +222,9 @@ TEST(FusedMatMulOpTest, FloatTypeNoTranspose) { } #if defined(USE_CUDA) || defined(USE_ROCM) // double support only implemented in CUDA/ROCM kernel -// CUDAExecutionProvider cannot be used with this model due to its ONNX opset not being supported by the layout transformer. -// TEST(FusedMatMulOpTest, DoubleTypeNoTranspose) { -// RunFusedMatMulTest("FusedMatMul", 1); -// } +TEST(FusedMatMulOpTest, DoubleTypeNoTranspose) { + RunFusedMatMulTest("FusedMatMul", 1); +} #endif TEST(FusedMatMulOpTest, FloatTypeTransposeA) { diff --git a/onnxruntime/test/contrib_ops/layer_norm_op_test.cc b/onnxruntime/test/contrib_ops/layer_norm_op_test.cc index 52e67bf0616d1..4611dc9082734 100644 --- a/onnxruntime/test/contrib_ops/layer_norm_op_test.cc +++ b/onnxruntime/test/contrib_ops/layer_norm_op_test.cc @@ -4,6 +4,7 @@ #include #include #include "core/framework/tensor.h" +#include "core/providers/cpu/nn/layer_norm_helper.h" #include "core/session/inference_session.h" #include "test/common/dnnl_op_test_utils.h" #include "test/common/tensor_op_test_utils.h" @@ -20,6 +21,33 @@ using namespace std; namespace onnxruntime { namespace test { +// Some feature (like broadcast support) are implemented in CPU and CUDA/ROCM provider only. A helper to run tests. +void RunTestOnCpuAndCuda(OpTester& test, const std::string& expected_failure_msg = "") { + auto expected_result = expected_failure_msg.empty() + ? OpTester::ExpectResult::kExpectSuccess + : OpTester::ExpectResult::kExpectFailure; + + std::vector> cpu_execution_provider; + cpu_execution_provider.push_back(DefaultCpuExecutionProvider()); + test.Run(expected_result, expected_failure_msg, {}, nullptr, &cpu_execution_provider); + + constexpr int min_cuda_architecture = 0; + bool enable_cuda = HasCudaEnvironment(min_cuda_architecture); + bool enable_rocm = (nullptr != DefaultRocmExecutionProvider().get()); + if (enable_cuda || enable_rocm) { + std::vector> gpu_execution_provider; + if (enable_cuda) { + gpu_execution_provider.push_back(DefaultCudaExecutionProvider()); + } else if (enable_rocm) { + gpu_execution_provider.push_back(DefaultRocmExecutionProvider()); + } + + if (gpu_execution_provider.size() > 0) { + test.Run(expected_result, expected_failure_msg, {}, nullptr, &gpu_execution_provider); + } + } +} + TEST(LayerNormTest, BERTLayerNorm) { OpTester tester("LayerNormalization", 17 /*opset_version*/); tester.AddAttribute("axis", -1); @@ -210,6 +238,106 @@ TEST(LayerNormTest, LayerNorm_Scale_Bias_Float16ScaleBiasOutput) { kNnapiExecutionProvider, kQnnExecutionProvider, kCoreMLExecutionProvider, kWebGpuExecutionProvider}); } +TEST(LayerNormTest, LayerNorm_Scale_Bias_NoBroadcast) { + OpTester test("LayerNormalization"); + test.AddAttribute("epsilon", 1e-05f); + + std::vector dims{2, 2, 2}; + test.AddInput("x", dims, {-1.0f, 2.0f, 3.0f, -4.0f, -10.264f, 8.6453f, 43.1561f, -0.641239f}); + test.AddInput("gamma", {2, 2, 2}, {-0.1f, 1.7f, -0.6953f, 5.1824f, -0.1f, 1.7f, -0.6953f, 5.1824f}); + test.AddInput("bias", {2, 2, 2}, {-2.0f, 0.3f, 0.0f, 0.0f, -2.0f, 0.3f, 0.0f, 0.0f}); + test.AddOutput("output", dims, {-1.9f, 2.0f, -0.6953f, -5.1824f, -1.9f, 2.0f, -0.6953f, -5.1824f}); + + test.SetOutputTolerance(0.0001f); + + RunTestOnCpuAndCuda(test); +} + +TEST(LayerNormTest, LayerNorm_Scale_Bias_NoBroadcast_Fp16) { + OpTester test("LayerNormalization"); + test.AddAttribute("epsilon", 1e-05f); + + std::vector dims{2, 2, 2}; + test.AddInput("x", dims, ToFloat16({-1.0f, 2.0f, 3.0f, -4.0f, -10.264f, 8.6453f, 43.1561f, -0.641239f})); + test.AddInput("gamma", {2, 2, 2}, ToFloat16({-0.1f, 1.7f, -0.6953f, 5.1824f, -0.1f, 1.7f, -0.6953f, 5.1824f})); + test.AddInput("bias", {2, 2, 2}, ToFloat16({-2.0f, 0.3f, 0.0f, 0.0f, -2.0f, 0.3f, 0.0f, 0.0f})); + test.AddOutput("output", dims, ToFloat16({-1.9f, 2.0f, -0.6953f, -5.1824f, -1.9f, 2.0f, -0.6953f, -5.1824f})); + + RunTestOnCpuAndCuda(test); +} + +TEST(LayerNormTest, LayerNorm_Scale_Bias_Broadcast_Dim0) { + OpTester test("LayerNormalization"); + test.AddAttribute("epsilon", 1e-05f); + + std::vector dims{4, 2, 2}; + test.AddInput("x", dims, {-1.0f, 2.0f, -10.264f, 8.6453f, 3.0f, -4.0f, 43.1561f, -0.641239f, -5.0f, 6.0f, -8.2164f, 0.11412f, 7.0f, 8.0f, 41.3156f, 3.0458f}); + test.AddInput("gamma", {1, 2, 2}, {-0.1f, 1.7f, -0.6953f, 5.1824f}); + test.AddInput("bias", {1, 2, 2}, {-2.0f, 0.3f, 0.0f, 0.0f}); + test.AddOutput("output", dims, {-1.9f, 2.0f, 0.6953f, 5.1824f, -2.1f, -1.4f, -0.6953f, -5.1824f, -1.9f, 2.0f, 0.6953f, 5.1824f, -1.9f, 2.0f, -0.6953f, -5.1824f}); + test.SetOutputTolerance(0.0001f); + + RunTestOnCpuAndCuda(test); +} + +TEST(LayerNormTest, LayerNorm_Scale_Bias_Broadcast_Dim0_Fp16) { + OpTester test("LayerNormalization"); + test.AddAttribute("epsilon", 1e-05f); + + std::vector dims{4, 2, 2}; + test.AddInput("x", dims, ToFloat16({-1.0f, 2.0f, -10.264f, 8.6453f, 3.0f, -4.0f, 43.1561f, -0.641239f, -5.0f, 6.0f, -8.2164f, 0.11412f, 7.0f, 8.0f, 41.3156f, 3.0458f})); + test.AddInput("gamma", {1, 2, 2}, ToFloat16({-0.1f, 1.7f, -0.6953f, 5.1824f})); + test.AddInput("bias", {1, 2, 2}, ToFloat16({-2.0f, 0.3f, 0.0f, 0.0f})); + test.AddOutput("output", dims, ToFloat16({-1.9f, 2.0f, 0.6953f, 5.1824f, -2.1f, -1.4f, -0.6953f, -5.1824f, -1.9f, 2.0f, 0.6953f, 5.1824f, -1.9f, 2.0f, -0.6953f, -5.1824f})); + + RunTestOnCpuAndCuda(test); +} + +TEST(LayerNormTest, LayerNorm_Scale_Bias_Broadcast_Dim1) { + OpTester test("LayerNormalization"); + test.AddAttribute("epsilon", 1e-05f); + + std::vector dims{2, 4, 2}; + test.AddInput("x", dims, {-1.0f, 2.0f, 3.0f, -4.0f, -5.0f, 6.0f, 7.0f, 8.0f, -10.264f, 8.6453f, 43.1561f, -0.641239f, -8.2164f, 0.11412f, 41.3156f, 3.0458f}); + test.AddInput("gamma", {2, 1, 2}, {-0.1f, 1.7f, -0.6953f, 5.1824f}); + test.AddInput("bias", {2, 1, 2}, {-2.0f, 0.3f, 0.0f, 0.0f}); + test.AddOutput("output", dims, {-1.9f, 2.0f, -2.1f, -1.4f, -1.9f, 2.0f, -1.9f, 2.0f, 0.6953f, 5.1824f, -0.6953f, -5.1824f, 0.6953f, 5.1824f, -0.6953f, -5.1824f}); + test.SetOutputTolerance(0.0001f); + + RunTestOnCpuAndCuda(test); +} + +TEST(LayerNormTest, LayerNorm_Scale_Bias_Broadcast_Dim1_Fp16) { + OpTester test("LayerNormalization"); + test.AddAttribute("epsilon", 1e-05f); + + std::vector dims{2, 4, 2}; + test.AddInput("x", dims, ToFloat16({-1.0f, 2.0f, 3.0f, -4.0f, -5.0f, 6.0f, 7.0f, 8.0f, -10.264f, 8.6453f, 43.1561f, -0.641239f, -8.2164f, 0.11412f, 41.3156f, 3.0458f})); + test.AddInput("gamma", {2, 1, 2}, ToFloat16({-0.1f, 1.7f, -0.6953f, 5.1824f})); + test.AddInput("bias", {2, 1, 2}, ToFloat16({-2.0f, 0.3f, 0.0f, 0.0f})); + test.AddOutput("output", dims, ToFloat16({-1.9f, 2.0f, -2.1f, -1.4f, -1.9f, 2.0f, -1.9f, 2.0f, 0.6953f, 5.1824f, -0.6953f, -5.1824f, 0.6953f, 5.1824f, -0.6953f, -5.1824f})); + + RunTestOnCpuAndCuda(test); +} + +TEST(LayerNormTest, LayerNorm_Scale_Bias_Broadcast_Fp16) { + auto run_test = [](bool is_initializer) { + OpTester test("LayerNormalization"); + test.AddAttribute("epsilon", 1e-05f); + + std::vector dims{1, 3, 2}; + test.AddInput("x", dims, ToFloat16({1.2416f, 0.946123f, 13.1685f, 0.36423f, 21.145f, 0.03941f})); + test.AddInput("gamma", {1, 1, 2}, ToFloat16({-0.6953f, 5.1824f}), is_initializer); + test.AddInput("bias", {1, 1, 2}, ToFloat16({0.6435f, -0.3964f}), is_initializer); + test.AddOutput("output", dims, ToFloat16({-0.0516f, -5.5776f, -0.0518f, -5.5788f, -0.0518f, -5.5788f})); + + RunTestOnCpuAndCuda(test); + }; + + run_test(false); + run_test(true); +} + TEST(LayerNormTest, LayerNorm_Scale_Bias_Float16InputScaleBiasOutput) { auto run_test = [](bool is_initializer) { OpTester test("LayerNormalization"); @@ -300,6 +428,21 @@ TEST(LayerNormTest, LayerNorm17_double) { test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kDnnlExecutionProvider}); } +// Test normalize size shall be larger than 1. +TEST(LayerNormTest, LayerNorm_InvalidNormSize) { + OpTester test("LayerNormalization"); + test.AddAttribute("epsilon", 1e-05f); + + std::vector dims{1, 3, 1}; + test.AddInput("x", dims, {1.2416f, 0.946123f, 13.1685f}); + test.AddInput("gamma", {1}, {-0.6953f}); + test.AddInput("bias", {1}, {0.6435f}); + test.AddAttribute("axis", 2); + test.AddOutput("output", dims, {-0.0516f, -5.5776f, -0.0518f}); + + RunTestOnCpuAndCuda(test, kLayerNormInvalidSize); +} + TEST(LayerNormTest, LayerNorm_InvalidScaleBias) { OpTester test("LayerNormalization"); test.AddAttribute("epsilon", 1e-05f); @@ -311,11 +454,10 @@ TEST(LayerNormTest, LayerNorm_InvalidScaleBias) { test.AddInput("bias", {2}, {0.6435f, -0.3964f}); test.AddAttribute("axis", 1); test.AddOutput("output", dims, {-0.0516f, -5.5776f, -0.0518f, -5.5788f, -0.0518f, -5.5788f}); + // CPU and CUDA EPs have check for unexpected scale or bias sizes. Exclude other EPs with a LayerNormalization // implementation for which we don't control the check or error message. - test.Run(OpTester::ExpectResult::kExpectFailure, - "Size of X.shape()[axis:] == 6. Size of scale and bias (if provided) must match this", - {kDnnlExecutionProvider, kDmlExecutionProvider, kTensorrtExecutionProvider}); + RunTestOnCpuAndCuda(test, kLayerNormInputShapeMismatchError); } #if defined(USE_DNNL) diff --git a/onnxruntime/test/contrib_ops/matmul_4bits_test.cc b/onnxruntime/test/contrib_ops/matmul_4bits_test.cc index eebe9197573c6..9bf08c6350833 100644 --- a/onnxruntime/test/contrib_ops/matmul_4bits_test.cc +++ b/onnxruntime/test/contrib_ops/matmul_4bits_test.cc @@ -546,8 +546,8 @@ TEST(MatMulNBits, Float16Large) { // of elements in this test, ULPs should probably be used instead of absolute/relative tolerances. float abs_error = 0.3f; #elif USE_WEBGPU - // See Intel A770 to pass these tests with an absolute error of 0.08. - float abs_error = 0.08f; + // Use absolute error of 0.1 for WebGPU with subgroup implementation + float abs_error = 0.1f; #else float abs_error = 0.05f; #endif diff --git a/onnxruntime/test/contrib_ops/multihead_attention_op_test.cc b/onnxruntime/test/contrib_ops/multihead_attention_op_test.cc index 6b6799d73fb56..ffdf69cc149b3 100644 --- a/onnxruntime/test/contrib_ops/multihead_attention_op_test.cc +++ b/onnxruntime/test/contrib_ops/multihead_attention_op_test.cc @@ -524,6 +524,7 @@ static void RunMultiHeadAttentionTests(AttentionTestData& data, // Test fused cross attention kernel // It requires head_size > 32 and head_size <= 64 for T4 GPU; hidden_size == v_hidden_size. TEST(MultiHeadAttentionTest, CrossAttention_Batch2_HeadSize40) { + ROCM_GTEST_SKIP("ROCm MHA skip - missing support for ROCm on Radeon"); AttentionTestData data; GetCrossAttentionData_HeadSize40(data); RunMultiHeadAttentionTests(data); @@ -543,6 +544,7 @@ TEST(MultiHeadAttentionTest, CrossAttention_Batch2_HeadSize32_RightSidePadding_M } TEST(MultiHeadAttentionTest, CrossAttention_Batch2_HeadSize32_RightSidePadding_Mask2D) { + ROCM_GTEST_SKIP("ROCm MHA skip - missing support for ROCm on Radeon"); AttentionTestData data; GetCrossAttentionData_Batch2_HeadSize32_RightSidePadding(data, false); RunMultiHeadAttentionTests(data, DISABLE_CPU | DISABLE_WEBGPU); @@ -552,6 +554,7 @@ TEST(MultiHeadAttentionTest, CrossAttention_Batch2_HeadSize32_RightSidePadding_M } TEST(MultiHeadAttentionTest, CrossAttention_Batch1_HeadSize32_LeftSidePadding_Mask2D) { + ROCM_GTEST_SKIP("ROCm MHA skip - missing support for ROCm on Radeon"); AttentionTestData data; GetCrossAttentionData_Batch1_HeadSize32_LeftSidePadding(data); RunMultiHeadAttentionTests(data, DISABLE_CPU | DISABLE_WEBGPU); @@ -561,12 +564,14 @@ TEST(MultiHeadAttentionTest, CrossAttention_Batch1_HeadSize32_LeftSidePadding_Ma } TEST(MultiHeadAttentionTest, CrossAttention_Batch2_HeadSize32_NoBias_NoMask_PackedKV) { + ROCM_GTEST_SKIP("ROCm MHA skip - missing support for ROCm on Radeon"); AttentionTestData data; GetCrossAttentionData_Batch2_HeadSize32_NoBias_NoMask_PackedKV(data); RunMultiHeadAttentionTests(data, DISABLE_WEBGPU); } TEST(MultiHeadAttentionTest, SelfAttention_Batch2_HeadSize32_NoBias_NoMask_PackedQKV) { + ROCM_GTEST_SKIP("ROCm MHA skip - missing support for ROCm on Radeon"); AttentionTestData data; GetSelfAttentionData_Batch2_HeadSize32_NoBias_NoMask_PackedQKV(data); RunMultiHeadAttentionTests(data, DISABLE_WEBGPU); @@ -574,6 +579,7 @@ TEST(MultiHeadAttentionTest, SelfAttention_Batch2_HeadSize32_NoBias_NoMask_Packe // This tests qk_head_size != v_head_size TEST(MultiHeadAttentionTest, CrossAttention_Batch2_HeadSize16_8) { + ROCM_GTEST_SKIP("ROCm MHA skip - missing support for ROCm on Radeon"); AttentionTestData data; GetCrossAttentionData_HeadSize16_8(data); RunMultiHeadAttentionTests(data); @@ -583,6 +589,7 @@ TEST(MultiHeadAttentionTest, CrossAttention_Batch2_HeadSize16_8) { } TEST(MultiHeadAttentionTest, CrossAttention_Batch1_HeadSize16) { + ROCM_GTEST_SKIP("ROCm MHA skip - missing support for ROCm on Radeon"); AttentionTestData data; GetCrossAttentionData_HeadSize16(data); RunMultiHeadAttentionTests(data); @@ -615,7 +622,8 @@ TEST(MultiHeadAttentionTest, SelfAttention_WithPast_WithAttnBias_ForT5) { RunMultiHeadAttentionTests(data, DISABLE_CPU); } -TEST(MultiHeadAttentionTest, AttentionCutlassAttnBias) { +TEST(MultiHeadAttentionTest, AttentionCutlassRelPosBias) { + ROCM_GTEST_SKIP("ROCm MHA skip - missing support for ROCm on Radeon"); // ROCM_GTEST_SKIP("ROCm does not support cutlass"); AttentionTestData data; GetAttentionDataCutlassAttnBias(data); @@ -623,6 +631,7 @@ TEST(MultiHeadAttentionTest, AttentionCutlassAttnBias) { } TEST(MultiHeadAttentionTest, CrossAttention_DiffSequenceLengths) { + ROCM_GTEST_SKIP("ROCm MHA skip - missing support for ROCm on Radeon"); // Whisper decoder cross attention without mask and different sequence lengths for Q and K/V AttentionTestData data; GetCrossAttentionData_DiffSequenceLengths(data); @@ -635,7 +644,8 @@ TEST(MultiHeadAttentionTest, CrossAttention_DiffSequenceLengths) { RunMultiHeadAttentionTests(data, DISABLE_CUDA | DISABLE_WEBGPU); } -TEST(MultiHeadAttentionTest, SelfAttention_WithPastAndPresent_NoMask_NoAttnBias) { +TEST(MultiHeadAttentionTest, SelfAttention_WithPastAndPresent_NoMask_NoRelPosBias) { + ROCM_GTEST_SKIP("ROCm MHA skip - missing support for ROCm on Radeon"); // Whisper decoder self attention with past_kv and present_kv AttentionTestData data; GetSelfAttentionData_WithPastAndPresent_NoMask_NoAttnBias(data); diff --git a/onnxruntime/test/contrib_ops/multihead_attention_op_test_data_gen.py b/onnxruntime/test/contrib_ops/multihead_attention_op_test_data_gen.py index bdb0ffc6c50db..52ce2ef5fdef1 100644 --- a/onnxruntime/test/contrib_ops/multihead_attention_op_test_data_gen.py +++ b/onnxruntime/test/contrib_ops/multihead_attention_op_test_data_gen.py @@ -7,7 +7,6 @@ # CUBLAS_WORKSPACE_CONFIG=:4096:8 python multihead_attention_op_test_data_gen.py import math -from typing import Optional, Tuple import numpy as np import torch @@ -56,12 +55,12 @@ def get_extended_attention_mask(self, attention_mask: Tensor, dtype: torch.dtype def forward( self, hidden_states: torch.Tensor, - attention_mask: Optional[torch.FloatTensor] = None, - encoder_hidden_states: Optional[torch.FloatTensor] = None, - encoder_attention_mask: Optional[torch.FloatTensor] = None, - past_key_value: Optional[Tuple[Tuple[torch.FloatTensor]]] = None, - output_attentions: Optional[bool] = False, - ) -> Tuple[torch.Tensor]: + attention_mask: torch.FloatTensor | None = None, + encoder_hidden_states: torch.FloatTensor | None = None, + encoder_attention_mask: torch.FloatTensor | None = None, + past_key_value: tuple[tuple[torch.FloatTensor]] | None = None, + output_attentions: bool | None = False, + ) -> tuple[torch.Tensor]: mixed_query_layer = self.query(hidden_states) if self.verbose: print("q", mixed_query_layer) diff --git a/onnxruntime/test/fuzzing/include/OnnxPrediction.h b/onnxruntime/test/fuzzing/include/OnnxPrediction.h index c169aaa16fd6e..c99120dc45479 100644 --- a/onnxruntime/test/fuzzing/include/OnnxPrediction.h +++ b/onnxruntime/test/fuzzing/include/OnnxPrediction.h @@ -20,7 +20,7 @@ #include #include "BetaDistribution.h" -#include "onnx/onnx_pb.h" +#include "core/graph/onnx_protobuf.h" #include "onnxruntime_cxx_api.h" #include "testlog.h" diff --git a/onnxruntime/test/fuzzing/ort_libfuzzer/OrtProtoLibfuzzer.cpp b/onnxruntime/test/fuzzing/ort_libfuzzer/OrtProtoLibfuzzer.cpp index 607d9cfd9c755..472122be58e89 100644 --- a/onnxruntime/test/fuzzing/ort_libfuzzer/OrtProtoLibfuzzer.cpp +++ b/onnxruntime/test/fuzzing/ort_libfuzzer/OrtProtoLibfuzzer.cpp @@ -5,7 +5,7 @@ #include "OnnxPrediction.h" #include "onnxruntime_session_options_config_keys.h" #include "src/libfuzzer/libfuzzer_macro.h" -#include "onnx/onnx_pb.h" +#include "core/graph/onnx_protobuf.h" #include diff --git a/onnxruntime/test/fuzzing/src/test.cpp b/onnxruntime/test/fuzzing/src/test.cpp index 0755b8493f2a9..c9786fa48dae3 100644 --- a/onnxruntime/test/fuzzing/src/test.cpp +++ b/onnxruntime/test/fuzzing/src/test.cpp @@ -282,7 +282,7 @@ int main(int argc, char* argv[]) { // Enable telemetry events // env.EnableTelemetryEvents(); - struct RunStats run_stats {}; + struct RunStats run_stats{}; runtimeOpt opt{}; user_options& user_opt{opt.user_opt}; Logger::wcstream& werr_stream_buf{opt.werr_stream_buf}; diff --git a/onnxruntime/test/mlas/bench/bench_hgemm.cpp b/onnxruntime/test/mlas/bench/bench_hgemm.cpp new file mode 100644 index 0000000000000..1e8b0eb7c34d6 --- /dev/null +++ b/onnxruntime/test/mlas/bench/bench_hgemm.cpp @@ -0,0 +1,86 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +#include "mlas.h" +#include "bench_util.h" +#include "core/util/thread_utils.h" + +#include +#include + +static const std::vector hgemm_bench_arg_names = {"M", "N", "K"}; + +void HGEMM(benchmark::State& state, bool transA, bool transB) { + if (state.range(0) <= 0) throw std::invalid_argument("M must greater than 0!"); + if (state.range(1) <= 0) throw std::invalid_argument("N must greater than 0!"); + if (state.range(2) <= 0) throw std::invalid_argument("K must greater than 0!"); + const size_t M = static_cast(state.range(0)); + const size_t N = static_cast(state.range(1)); + const size_t K = static_cast(state.range(2)); + + auto A = RandomVectorUniform(static_cast(M * K), MLAS_FP16(-1.0f), MLAS_FP16(1.0f)); + auto B = RandomVectorUniform(static_cast(N * K), MLAS_FP16(-1.0f), MLAS_FP16(1.0f)); + std::vector C(static_cast(M * N)); + + MLAS_FP16 alpha = MLAS_FP16(1.0f); + MLAS_FP16 beta = MLAS_FP16(0.0f); + OrtThreadPoolParams tpo; + tpo.thread_pool_size = 8; + tpo.auto_set_affinity = true; + std::unique_ptr tp( + onnxruntime::concurrency::CreateThreadPool(&onnxruntime::Env::Default(), + tpo, onnxruntime::concurrency::ThreadPoolType::INTRA_OP)); + MlasGemm( + transA ? CblasTrans : CblasNoTrans, + transB ? CblasTrans : CblasNoTrans, + static_cast(M), + static_cast(N), + static_cast(K), + A.data(), + transA ? M : K, + B.data(), + transB ? K : N, + C.data(), + N, + alpha.val, + beta.val, + tp.get()); + + for (auto _ : state) { + MlasGemm( + transA ? CblasTrans : CblasNoTrans, + transB ? CblasTrans : CblasNoTrans, + static_cast(M), + static_cast(N), + static_cast(K), + A.data(), + transA ? M : K, + B.data(), + transB ? K : N, + C.data(), + N, + alpha.val, + beta.val, + tp.get()); + } +} + +static void GemmSizeWithOne(benchmark::internal::Benchmark* b) { + b->ArgNames(hgemm_bench_arg_names); + b->ArgsProduct({{1}, {63, 255, 1023}, {63, 255, 1023}}); + b->ArgsProduct({{63, 255, 1023}, {1}, {63, 255, 1023}}); + b->ArgsProduct({{63, 255, 1023}, {63, 255, 1023}, {1}}); +} +BENCHMARK_CAPTURE(HGEMM, GEMV_TransB, false, true)->Apply(GemmSizeWithOne)->UseRealTime(); + +static void GemmSizeProducts(benchmark::internal::Benchmark* b) { + b->ArgNames(hgemm_bench_arg_names); + b->ArgsProduct({{63, 255, 1023}, {63, 255, 1023}, {63, 255, 1023}}); +} +BENCHMARK_CAPTURE(HGEMM, NORMAL_TransB, false, true)->Apply(GemmSizeProducts)->UseRealTime(); + +static void GemmLLMSizeProducts(benchmark::internal::Benchmark* b) { + b->ArgNames(hgemm_bench_arg_names); + b->ArgsProduct({{1, 1024, 2048}, {4096, 11008}, {4096, 11008}}); +} +BENCHMARK_CAPTURE(HGEMM, LLM, false, true)->Apply(GemmLLMSizeProducts)->UseRealTime(); diff --git a/onnxruntime/test/mlas/unittest/test_hgemm_neon.cpp b/onnxruntime/test/mlas/unittest/test_hgemm_neon.cpp new file mode 100644 index 0000000000000..4f3d690b432bf --- /dev/null +++ b/onnxruntime/test/mlas/unittest/test_hgemm_neon.cpp @@ -0,0 +1,393 @@ +/*++ + +Copyright (c) Microsoft Corporation. All rights reserved. + +Licensed under the MIT License. + +Module Name: + + test_hgemm_neon.cpp + +Abstract: + + Tests for MLAS fp16 GEMM on ARM CPU. + +--*/ + +#include +#include + +#include "test/mlas/unittest/test_util.h" +#include "core/mlas/lib/mlasi.h" +#include "core/mlas/lib/halfgemm.h" + +#if defined(MLAS_F16VEC_INTRINSICS_SUPPORTED) && defined(MLAS_TARGET_ARM64) + +class MlasNeonHGemmPackBTest : public MlasTestBase { + private: + std::random_device rd_; + unsigned int seed_; + std::mt19937 gen_; // mersenne_twister_engine seeded with rd() + std::uniform_real_distribution distrib_; + MatrixGuardBuffer input_, ref_, packed_; + + template + MLAS_FORCEINLINE void PackB(const MLAS_FP16* src, MLAS_FP16* dst) { + size_t i = 0; + for (; i + 16 <= N; i += 16) { + for (size_t j = 0; j < K; ++j) { + for (size_t k = 0; k < 16; ++k) { + *dst = src[(i + k) * K + j]; + ++dst; + } + } + } + if (i + 8 <= N) { + for (size_t j = 0; j < K; ++j) { + for (size_t k = 0; k < 8; ++k) { + *dst = src[(i + k) * K + j]; + ++dst; + } + } + i += 8; + } + if (i < N) { + for (size_t j = 0; j < K; ++j) { + for (size_t k = 0; k < N - i; ++k) { + *dst = src[(i + k) * K + j]; + ++dst; + } + dst += 8 - (N - i); + } + } + } + + template + MLAS_FORCEINLINE void Check(const MLAS_FP16* packed, const MLAS_FP16* ref) { + size_t n = ((N + 7) & ~7) * K; + for (size_t i = 0; i < n; ++i) { + ASSERT_EQ(packed[i].val, ref[i].val) << " seed " << seed_ << " i " << i; + } + } + + template + void TestPackB() { + auto InitializeBuffer = [this](MLAS_FP16* buffer, size_t count) { + for (size_t i = 0; i < count; i++) { + buffer[i] = MLAS_FP16(distrib_(gen_)); + } + }; + + const auto* input = input_.GetFilledBuffer(N * K, InitializeBuffer); + auto* packed = packed_.GetBuffer(K * ((N + 7) & ~7), true); + auto* ref = ref_.GetBuffer(K * ((N + 7) & ~7), true); + hgemm_neon::HPackB_TransposedB_Kernel(input, packed, N, K, K); + PackB(input, ref); + Check(packed, ref); + } + + public: + MlasNeonHGemmPackBTest() + : seed_(rd_()), gen_(seed_), distrib_(-100.f, 100.f) { + } + + static const char* GetTestSuiteName() { + return "NeonHGemmPackB"; + } + + void ExecuteShort(void) override { + TestPackB<1, 1>(); + TestPackB<1, 15>(); + TestPackB<1, 31>(); + TestPackB<8, 1>(); + TestPackB<8, 16>(); + TestPackB<9, 31>(); + TestPackB<9, 33>(); + TestPackB<15, 33>(); + TestPackB<17, 67>(); + TestPackB<17, 96>(); + TestPackB<265, 263>(); + } +}; + +class MlasNeonHGemmTransposedBTest : public MlasTestBase { + private: + std::random_device rd_; + unsigned int seed_; + std::mt19937 gen_; // mersenne_twister_engine seeded with rd() + std::uniform_real_distribution distrib_; + MatrixGuardBuffer A_, B_, ref_, C_; + + template + MLAS_FORCEINLINE void HGemm(const MLAS_FP16* A, const MLAS_FP16* B, MLAS_FP16* C, MLAS_FP16 alpha, MLAS_FP16 beta) { + float alphaf = alpha.ToFloat(); + float betaf = beta.ToFloat(); + for (size_t m = 0; m < M; ++m) { + for (size_t n = 0; n < N; ++n) { + float accu = 0.0f; + for (size_t k = 0; k < K; ++k) { + accu += (A[m * K + k].ToFloat()) * (B[n * K + k].ToFloat()); + } + C[m * N + n] = MLAS_FP16(accu * alphaf + C[m * N + n].ToFloat() * betaf); + } + } + } + + MLAS_FORCEINLINE + bool FloatEqual(MLAS_FP16 v0, MLAS_FP16 v1, float rtol, float atol) { + float f0 = v0.ToFloat(), f1 = v1.ToFloat(); + return std::abs(f0 - f1) <= std::abs(f1 * rtol) + atol; + } + + template + MLAS_FORCEINLINE void Check(const MLAS_FP16* C, const MLAS_FP16* ref) { + size_t n = M * N; + for (size_t i = 0; i < n; ++i) { + ASSERT_TRUE(FloatEqual(C[i], ref[i], 0.02f, 0.055f)) + << " seed " << seed_ << " i " << i + << " M " << M << " N " << N << " K " << K + << " v0 " << C[i] << " v1 " << ref[i]; + } + } + + template + void TestHGemm(MLAS_FP16 alpha, MLAS_FP16 beta) { + auto InitializeBuffer = [this](MLAS_FP16* buffer, size_t count) { + for (size_t i = 0; i < count; i++) { + buffer[i] = MLAS_FP16(distrib_(gen_)); + } + }; + + const auto* A = A_.GetFilledBuffer(M * K, InitializeBuffer); + const auto* B = B_.GetFilledBuffer(K * N, InitializeBuffer); + auto* C = C_.GetBuffer(M * N, true); + auto* ref = ref_.GetBuffer(M * N, true); + hgemm_neon::HGemm_TransposedB_Kernel(A, B, C, M, N, K, K, K, N, alpha.val, beta.val); + HGemm(A, B, ref, alpha, beta); + Check(C, ref); + } + + public: + MlasNeonHGemmTransposedBTest() + : seed_(1928375), gen_(seed_), distrib_(-1.f, 1.f) { + } + + static const char* GetTestSuiteName() { + return "NeonHGemmTransposedB"; + } + + void ExecuteShort(void) override { + TestHGemm<2, 1, 1>(MLAS_FP16(1.0f), MLAS_FP16(0.0f)); + TestHGemm<1, 1, 1>(MLAS_FP16(0.5f), MLAS_FP16(1.0f)); + TestHGemm<2, 1, 1>(MLAS_FP16(1.5f), MLAS_FP16(0.5f)); + TestHGemm<1, 15, 17>(MLAS_FP16(1.0f), MLAS_FP16(0.0f)); + TestHGemm<2, 17, 15>(MLAS_FP16(0.5f), MLAS_FP16(1.0f)); + TestHGemm<1, 17, 15>(MLAS_FP16(1.5f), MLAS_FP16(0.5f)); + TestHGemm<1, 33, 31>(MLAS_FP16(1.0f), MLAS_FP16(0.0f)); + TestHGemm<2, 31, 32>(MLAS_FP16(0.5f), MLAS_FP16(1.0f)); + TestHGemm<1, 32, 33>(MLAS_FP16(1.5f), MLAS_FP16(0.5f)); + TestHGemm<1, 78, 263>(MLAS_FP16(0.5f), MLAS_FP16(0.0f)); + TestHGemm<2, 267, 79>(MLAS_FP16(1.5f), MLAS_FP16(1.0f)); + } +}; + +class MlasNeonHGemmTransposedPackedBTest : public MlasTestBase { + private: + std::random_device rd_; + unsigned int seed_; + std::mt19937 gen_; // mersenne_twister_engine seeded with rd() + std::uniform_real_distribution distrib_; + MatrixGuardBuffer A_, B_, ref_, C_; + + template + MLAS_FORCEINLINE void HGemm(const MLAS_FP16* A, const MLAS_FP16* B, MLAS_FP16* C, MLAS_FP16 alpha, MLAS_FP16 beta) { + float alphaf = alpha.ToFloat(); + float betaf = beta.ToFloat(); + size_t n = 0; + for (; n + 16 <= N; n += 16) { + for (size_t i = 0; i < 16; ++i) { + for (size_t m = 0; m < M; ++m) { + float accu = 0.0f; + for (size_t k = 0; k < K; ++k) { + accu += (A[m * K + k].ToFloat()) * (B[n * K + k * 16 + i].ToFloat()); + } + C[m * N + n + i] = MLAS_FP16(accu * alphaf + C[m * N + n + i].ToFloat() * betaf); + } + } + } + if (n + 8 <= N) { + for (size_t i = 0; i < 8; ++i) { + for (size_t m = 0; m < M; ++m) { + float accu = 0.0f; + for (size_t k = 0; k < K; ++k) { + accu += (A[m * K + k].ToFloat()) * (B[n * K + k * 8 + i].ToFloat()); + } + C[m * N + n + i] = MLAS_FP16(accu * alphaf + C[m * N + n + i].ToFloat() * betaf); + } + } + n += 8; + } + if (n < N) { + for (size_t i = 0; i < N - n; ++i) { + for (size_t m = 0; m < M; ++m) { + float accu = 0.0f; + for (size_t k = 0; k < K; ++k) { + accu += (A[m * K + k].ToFloat()) * (B[n * K + k * 8 + i].ToFloat()); + } + C[m * N + n + i] = MLAS_FP16(accu * alphaf + C[m * N + n + i].ToFloat() * betaf); + } + } + } + } + + MLAS_FORCEINLINE + bool FloatEqual(MLAS_FP16 v0, MLAS_FP16 v1, float rtol, float atol) { + float f0 = v0.ToFloat(), f1 = v1.ToFloat(); + return std::abs(f0 - f1) <= std::abs(f1 * rtol) + atol; + } + + template + MLAS_FORCEINLINE void Check(const MLAS_FP16* C, const MLAS_FP16* ref) { + size_t n = M * N; + for (size_t i = 0; i < n; ++i) { + ASSERT_TRUE(FloatEqual(C[i], ref[i], 0.02f, 0.055f)) + << " seed " << seed_ << " i " << i + << " M " << M << " K " << K << " N " << N + << " v0 " << C[i] << " v1 " << ref[i]; + } + } + + template + void TestHGemm(MLAS_FP16 alpha, MLAS_FP16 beta) { + auto InitializeBuffer = [this](MLAS_FP16* buffer, size_t count) { + for (size_t i = 0; i < count; i++) { + buffer[i] = MLAS_FP16(distrib_(gen_)); + } + }; + + const auto* A = A_.GetFilledBuffer(M * K, InitializeBuffer); + const auto* B = B_.GetFilledBuffer(K * ((N + 7) & ~7), InitializeBuffer); + auto* C = C_.GetBuffer(M * N, true); + auto* ref = ref_.GetBuffer(M * N, true); + hgemm_neon::HGemm_TransposedPackedB_Kernel(A, B, C, M, N, K, K, N, alpha.val, beta.val); + HGemm(A, B, ref, alpha, beta); + Check(C, ref); + } + + public: + MlasNeonHGemmTransposedPackedBTest() + : seed_(1928372), gen_(seed_), distrib_(-1.f, 1.f) { + } + + static const char* GetTestSuiteName() { + return "NeonHGemmTransposedPackedB"; + } + + void ExecuteShort(void) override { + TestHGemm<2, 1, 1>(MLAS_FP16(1.0f), MLAS_FP16(0.0f)); + TestHGemm<1, 1, 1>(MLAS_FP16(0.5f), MLAS_FP16(1.0f)); + TestHGemm<2, 1, 1>(MLAS_FP16(1.5f), MLAS_FP16(0.5f)); + TestHGemm<1, 15, 17>(MLAS_FP16(1.0f), MLAS_FP16(0.0f)); + TestHGemm<2, 17, 15>(MLAS_FP16(0.5f), MLAS_FP16(1.0f)); + TestHGemm<1, 17, 15>(MLAS_FP16(1.5f), MLAS_FP16(0.5f)); + TestHGemm<1, 33, 31>(MLAS_FP16(1.0f), MLAS_FP16(0.0f)); + TestHGemm<2, 31, 32>(MLAS_FP16(0.5f), MLAS_FP16(1.0f)); + TestHGemm<1, 32, 33>(MLAS_FP16(1.5f), MLAS_FP16(0.5f)); + TestHGemm<1, 78, 263>(MLAS_FP16(0.5f), MLAS_FP16(0.0f)); + TestHGemm<2, 267, 79>(MLAS_FP16(1.5f), MLAS_FP16(1.0f)); + } +}; + +class MlasNeonHGemmTest : public MlasTestBase { + private: + std::random_device rd_; + unsigned int seed_; + std::mt19937 gen_; // mersenne_twister_engine seeded with rd() + std::uniform_real_distribution distrib_; + MatrixGuardBuffer A_, B_, ref_, C_; + + template + MLAS_FORCEINLINE void HGemm(const MLAS_FP16* A, const MLAS_FP16* B, MLAS_FP16* C, MLAS_FP16 alpha, MLAS_FP16 beta) { + float alphaf = alpha.ToFloat(); + float betaf = beta.ToFloat(); + for (size_t i = 0; i < M; ++i) { + for (size_t j = 0; j < N; ++j) { + float accu = 0.0f; + for (size_t k = 0; k < K; ++k) { + accu += (A[i * K + k].ToFloat()) * (B[j * K + k].ToFloat()); + } + C[i * N + j] = MLAS_FP16(accu * alphaf + C[i * N + j].ToFloat() * betaf); + } + } + } + + MLAS_FORCEINLINE + bool FloatEqual(MLAS_FP16 v0, MLAS_FP16 v1, float rtol, float atol) { + float f0 = v0.ToFloat(), f1 = v1.ToFloat(); + return std::abs(f0 - f1) <= std::abs(f1 * rtol) + atol; + } + + template + MLAS_FORCEINLINE void Check(const MLAS_FP16* C, const MLAS_FP16* ref) { + for (size_t i = 0; i < M; ++i) { + for (size_t j = 0; j < N; ++j) { + ASSERT_TRUE(FloatEqual(C[i * N + j], ref[i * N + j], 0.02f, 0.055f)) + << " seed " << seed_ << " i " << i << " j " << j + << " M " << M << " K " << K << " N " << N + << " v0 " << C[i * N + j] << " v1 " << ref[i * N + j]; + } + } + } + + template + void TestHGemm(MLAS_FP16 alpha, MLAS_FP16 beta) { + auto InitializeBuffer = [this](MLAS_FP16* buffer, size_t count) { + for (size_t i = 0; i < count; i++) { + buffer[i] = MLAS_FP16(distrib_(gen_)); + } + }; + + const auto* A = A_.GetFilledBuffer(M * K, InitializeBuffer); + const auto* B = B_.GetFilledBuffer(K * N, InitializeBuffer); + auto* C = C_.GetBuffer(M * N, true); + auto* ref = ref_.GetBuffer(M * N, true); + MlasGemm(CblasNoTrans, CblasTrans, M, N, K, A, K, B, K, C, N, alpha.val, beta.val, nullptr); + HGemm(A, B, ref, alpha, beta); + Check(C, ref); + } + + public: + MlasNeonHGemmTest() + : seed_(192837), gen_(seed_), distrib_(-0.25f, 0.25f) { + } + + static const char* GetTestSuiteName() { + return "NeonHGemm"; + } + + void ExecuteShort(void) override { + TestHGemm<2, 1, 1>(MLAS_FP16(1.0f), MLAS_FP16(0.0f)); + TestHGemm<1, 128, 512>(MLAS_FP16(0.5f), MLAS_FP16(1.0f)); + TestHGemm<2, 128, 513>(MLAS_FP16(1.5f), MLAS_FP16(0.5f)); + TestHGemm<1, 128, 511>(MLAS_FP16(1.0f), MLAS_FP16(0.0f)); + TestHGemm<2, 129, 512>(MLAS_FP16(0.5f), MLAS_FP16(1.0f)); + TestHGemm<1, 127, 512>(MLAS_FP16(1.5f), MLAS_FP16(0.5f)); + TestHGemm<1, 513, 1023>(MLAS_FP16(0.5f), MLAS_FP16(1.0f)); + TestHGemm<2, 511, 1025>(MLAS_FP16(1.5f), MLAS_FP16(0.5f)); + TestHGemm<127, 513, 1023>(MLAS_FP16(1.0f), MLAS_FP16(0.0f)); + TestHGemm<129, 511, 1025>(MLAS_FP16(0.5f), MLAS_FP16(1.0f)); + } +}; + +static UNUSED_VARIABLE bool added_to_main = AddTestRegister([](bool is_short_execute) { + size_t count = 0; + if (is_short_execute) { + count += MlasDirectShortExecuteTests::RegisterShortExecute(); + count += MlasDirectShortExecuteTests::RegisterShortExecute(); + count += MlasDirectShortExecuteTests::RegisterShortExecute(); + count += MlasDirectShortExecuteTests::RegisterShortExecute(); + } + return count; +}); + +#endif // defined(MLAS_F16VEC_INTRINSICS_SUPPORTED) && defined(MLAS_TARGET_ARM64) diff --git a/onnxruntime/test/onnx/TestCase.cc b/onnxruntime/test/onnx/TestCase.cc index 51653f8c6ddac..d44f098db6c4c 100644 --- a/onnxruntime/test/onnx/TestCase.cc +++ b/onnxruntime/test/onnx/TestCase.cc @@ -961,6 +961,7 @@ std::unique_ptr> GetBrokenTests(const std::string& provider {"reduce_prod_empty_set", "unknown version", {}}, {"reduce_sum_empty_set", "unknown version", {}}, {"reduce_sum_square_empty_set_expanded", "unknown version", {}}, + {"averagepool_3d_dilations_large_count_include_pad_is_1_ceil_mode_is_True", "TODO(titaiwang): enable this in the next ONNX release."}, #ifdef ENABLE_TRAINING_CORE {"adagrad", "not a registered function/op", {}}, // Op not registered. {"adagrad_multiple", "not a registered function/op", {}}, // Op not registered. @@ -1396,10 +1397,10 @@ std::unique_ptr> GetBrokenTests(const std::string& provider broken_tests->insert({"resize_upsample_sizes_nearest", "result differs"}); broken_tests->insert({"resize_upsample_sizes_nearest_axes_2_3", "result differs"}); broken_tests->insert({"resize_upsample_sizes_nearest_axes_3_2", "result differs"}); - broken_tests->insert({"convtranspose_group_2", "group attribute (new of opset(22)) not supported"}); - broken_tests->insert({"convtranspose_group_2_image_3", "group attribute (new of opset(22)) not supported"}); broken_tests->insert({"resize_upsample_sizes_nearest_not_larger", "output=Y:expected 1 (3f800000), got 4 (40800000), diff: 3, tol=0.002 idx=24. 13 of 49 differ. CPU test passed."}); + broken_tests->insert({"convtranspose_group_2", "Segmentation fault (core dumped). CPU test passed."}); + broken_tests->insert({"convtranspose_group_2_image_3", "Segmentation fault (core dumped). CPU test passed."}); } #ifdef DISABLE_CONTRIB_OPS diff --git a/onnxruntime/test/onnx/microbenchmark/layer_normalization.cc b/onnxruntime/test/onnx/microbenchmark/layer_normalization.cc index f6158d8cbc12b..0fccc68c59efd 100644 --- a/onnxruntime/test/onnx/microbenchmark/layer_normalization.cc +++ b/onnxruntime/test/onnx/microbenchmark/layer_normalization.cc @@ -114,9 +114,9 @@ static void BM_LayerNormalization(benchmark::State& state) { auto status = layer_norm_impl.ComputeWithoutContext(x_data, x_shape, scale_data, - static_cast(scale_shape.Size()), + scale_shape, bias_data, - static_cast(bias_shape.Size()), + bias_shape, Y_data, mean_data, inv_std_dev_data, diff --git a/onnxruntime/test/optimizer/graph_transform_test_builder.h b/onnxruntime/test/optimizer/graph_transform_test_builder.h index f641c597acf07..88ad49329f929 100644 --- a/onnxruntime/test/optimizer/graph_transform_test_builder.h +++ b/onnxruntime/test/optimizer/graph_transform_test_builder.h @@ -82,7 +82,11 @@ class ModelTestBuilder { } template - NodeArg* MakeInput(const std::vector& shape, const std::vector& data) { + NodeArg* MakeInput(const std::vector& shape, const std::vector& data, + AllocatorPtr allocator = nullptr) { + if (!allocator) { + allocator = TestCPUExecutionProvider()->CreatePreferredAllocators()[0]; + } ONNX_NAMESPACE::TypeProto type_proto; type_proto.mutable_tensor_type()->set_elem_type(utils::ToTensorProtoElementType()); @@ -93,7 +97,7 @@ class ModelTestBuilder { } OrtValue input_value; - CreateMLValue(TestCPUExecutionProvider()->CreatePreferredAllocators()[0], + CreateMLValue(allocator, shape, data, &input_value); @@ -104,17 +108,19 @@ class ModelTestBuilder { } template - NodeArg* MakeInput(const std::vector& shape, T min, T max) { - return MakeInput(shape, rand_gen_.Uniform(shape, min, max)); + NodeArg* MakeInput(const std::vector& shape, T min, T max, + AllocatorPtr allocator = nullptr) { + return MakeInput(shape, rand_gen_.Uniform(shape, min, max), allocator); } - NodeArg* MakeInputBool(const std::vector& shape) { + NodeArg* MakeInputBool(const std::vector& shape, + AllocatorPtr allocator = nullptr) { std::vector data_uint8 = rand_gen_.Uniform(shape, 0, 1); std::vector data; for (uint8_t x : data_uint8) { data.push_back(x != 0); } - return MakeInput(shape, data); + return MakeInput(shape, data, allocator); } template diff --git a/onnxruntime/test/perftest/command_args_parser.cc b/onnxruntime/test/perftest/command_args_parser.cc index 0b1b2bae6c972..5031d557ee2f0 100644 --- a/onnxruntime/test/perftest/command_args_parser.cc +++ b/onnxruntime/test/perftest/command_args_parser.cc @@ -101,7 +101,9 @@ namespace perftest { "\t Otherwise, it will be fp32 precision. Works for float32 model for HTP backend. Defaults to '1' (with FP16 precision.). \n" "\t [QNN only] [offload_graph_io_quantization]: Offload graph input quantization and graph output dequantization to another EP (typically CPU EP). \n" "\t Defaults to '0' (QNN EP handles the graph I/O quantization and dequantization). \n" - "\t [QNN only] [enable_htp_spill_fill_buffer]: Enable HTP spill file buffer, used while generating QNN context binary." + "\t [QNN only] [enable_htp_spill_fill_buffer]: Enable HTP spill fill buffer, used while generating QNN context binary.\n" + "\t [QNN only] [enable_htp_shared_memory_allocator]: Enable the QNN HTP shared memory allocator and use it for inputs and outputs. Requires libcdsprpc.so/dll to be available.\n" + "\t Defaults to '0' (disabled).\n" "\t [Example] [For QNN EP] -e qnn -i \"backend_path|/folderpath/libQnnCpu.so\" \n" "\n" "\t [TensorRT only] [trt_max_partition_iterations]: Maximum iterations for TensorRT parser to get capability.\n" diff --git a/onnxruntime/test/perftest/ort_test_session.cc b/onnxruntime/test/perftest/ort_test_session.cc index 08c2cff8058c2..a7b5a7c72dff1 100644 --- a/onnxruntime/test/perftest/ort_test_session.cc +++ b/onnxruntime/test/perftest/ort_test_session.cc @@ -202,7 +202,8 @@ OnnxRuntimeTestSession::OnnxRuntimeTestSession(Ort::Env& env, std::random_device {"backend_path", "profiling_file_path", "profiling_level", "rpc_control_latency", "vtcm_mb", "soc_model", "device_id", "htp_performance_mode", "qnn_saver_path", "htp_graph_finalization_optimization_mode", "qnn_context_priority", "htp_arch", - "enable_htp_fp16_precision", "offload_graph_io_quantization", "enable_htp_spill_fill_buffer"}); + "enable_htp_fp16_precision", "offload_graph_io_quantization", "enable_htp_spill_fill_buffer", + "enable_htp_shared_memory_allocator"}); for (const auto& provider_option : provider_options) { const std::string& key = provider_option.first; const std::string& value = provider_option.second; @@ -231,7 +232,7 @@ OnnxRuntimeTestSession::OnnxRuntimeTestSession(Ort::Env& env, std::random_device } else if (key == "qnn_saver_path") { // no validation } else if (key == "htp_graph_finalization_optimization_mode") { - std::unordered_set supported_htp_graph_final_opt_modes = {"0", "1", "2", "3"}; + std::set supported_htp_graph_final_opt_modes = {"0", "1", "2", "3"}; if (supported_htp_graph_final_opt_modes.find(value) == supported_htp_graph_final_opt_modes.end()) { std::ostringstream str_stream; std::copy(supported_htp_graph_final_opt_modes.begin(), supported_htp_graph_final_opt_modes.end(), @@ -245,7 +246,7 @@ OnnxRuntimeTestSession::OnnxRuntimeTestSession(Ort::Env& env, std::random_device ORT_THROW("Supported qnn_context_priority: low, normal, normal_high, high"); } } else if (key == "htp_arch") { - std::unordered_set supported_htp_archs = {"0", "68", "69", "73", "75"}; + std::set supported_htp_archs = {"0", "68", "69", "73", "75"}; if (supported_htp_archs.find(value) == supported_htp_archs.end()) { std::ostringstream str_stream; std::copy(supported_htp_archs.begin(), supported_htp_archs.end(), @@ -253,8 +254,11 @@ OnnxRuntimeTestSession::OnnxRuntimeTestSession(Ort::Env& env, std::random_device std::string str = str_stream.str(); ORT_THROW("Wrong value for htp_arch. select from: " + str); } - } else if (key == "enable_htp_fp16_precision" || key == "offload_graph_io_quantization" || key == "enable_htp_spill_fill_buffer") { - std::unordered_set supported_options = {"0", "1"}; + } else if (key == "enable_htp_fp16_precision" || + key == "offload_graph_io_quantization" || + key == "enable_htp_spill_fill_buffer" || + key == "enable_htp_shared_memory_allocator") { + std::set supported_options = {"0", "1"}; if (supported_options.find(value) == supported_options.end()) { std::ostringstream str_stream; std::copy(supported_options.begin(), supported_options.end(), @@ -262,6 +266,11 @@ OnnxRuntimeTestSession::OnnxRuntimeTestSession(Ort::Env& env, std::random_device std::string str = str_stream.str(); ORT_THROW("Wrong value for ", key, ". select from: ", str); } + + if (key == "enable_htp_shared_memory_allocator" && value == "1") { + // if this option is set, also use the enabled allocator + device_memory_name_ = "QnnHtpShared"; + } } } session_options.AppendExecutionProvider("QNN", provider_options); @@ -505,10 +514,6 @@ select from 'TF8', 'TF16', 'UINT8', 'FLOAT', 'ITENSOR'. \n)"); } else if (provider_name_ == onnxruntime::kMIGraphXExecutionProvider) { #ifdef USE_MIGRAPHX Ort::ThrowOnError(OrtSessionOptionsAppendExecutionProvider_MIGraphX(session_options, 0)); - OrtROCMProviderOptions rocm_options; - rocm_options.miopen_conv_exhaustive_search = performance_test_config.run_config.cudnn_conv_algo; - rocm_options.do_copy_in_default_stream = !performance_test_config.run_config.do_cuda_copy_in_separate_stream; - session_options.AppendExecutionProvider_ROCM(rocm_options); #else ORT_THROW("MIGraphX is not supported in this build\n"); #endif @@ -838,8 +843,8 @@ select from 'TF8', 'TF16', 'UINT8', 'FLOAT', 'ITENSOR'. \n)"); }; } else { Ort::MemoryInfo memory_info = Ort::MemoryInfo(device_memory_name_.data(), OrtArenaAllocator, 0, OrtMemTypeCPUOutput); - custom_allocator_ = std::make_unique(session_, memory_info); - allocator_ = *custom_allocator_; + custom_allocator_ = Ort::Allocator(session_, memory_info); + allocator_ = custom_allocator_; // free dimensions are treated as 1 if not overridden transform_fcn = [](int64_t input) { return (input == -1) ? -input : input; }; diff --git a/onnxruntime/test/perftest/ort_test_session.h b/onnxruntime/test/perftest/ort_test_session.h index 7d5e46983ad41..d6580812da8f0 100644 --- a/onnxruntime/test/perftest/ort_test_session.h +++ b/onnxruntime/test/perftest/ort_test_session.h @@ -39,7 +39,7 @@ class OnnxRuntimeTestSession : public TestSession { std::uniform_int_distribution dist_; std::vector> test_inputs_; OrtAllocator* allocator_ = Ort::AllocatorWithDefaultOptions(); - std::unique_ptr custom_allocator_; + Ort::Allocator custom_allocator_{nullptr}; std::vector outputs_; std::vector output_names_; // The same size with output_names_. diff --git a/onnxruntime/test/platform/apple/apple_package_test/Podfile.template b/onnxruntime/test/platform/apple/apple_package_test/Podfile.template index 5c557cc73fe7f..9abec2242502f 100644 --- a/onnxruntime/test/platform/apple/apple_package_test/Podfile.template +++ b/onnxruntime/test/platform/apple/apple_package_test/Podfile.template @@ -15,7 +15,7 @@ if ENV['SKIP_MACOS_TEST'] != 'true' # Comment the next line if you don't want to use dynamic frameworks use_frameworks! - platform :osx, '11.0' + platform :osx, '13.3' target 'macos_package_testUITests' do inherit! :search_paths diff --git a/onnxruntime/test/providers/coreml/coreml_basic_test.cc b/onnxruntime/test/providers/coreml/coreml_basic_test.cc index 302ad57fb88c5..a9aa78b7a3229 100644 --- a/onnxruntime/test/providers/coreml/coreml_basic_test.cc +++ b/onnxruntime/test/providers/coreml/coreml_basic_test.cc @@ -15,7 +15,7 @@ #include "test/util/include/inference_session_wrapper.h" #include "test/util/include/test_environment.h" #include "test/util/include/test_utils.h" -#include "onnx/onnx_pb.h" +#include "core/graph/onnx_protobuf.h" #if !defined(ORT_MINIMAL_BUILD) // if this is a full build we need the provider test utils diff --git a/onnxruntime/test/providers/cpu/math/element_wise_ops_test.cc b/onnxruntime/test/providers/cpu/math/element_wise_ops_test.cc index a74517840097c..5fd83ac1ad61b 100644 --- a/onnxruntime/test/providers/cpu/math/element_wise_ops_test.cc +++ b/onnxruntime/test/providers/cpu/math/element_wise_ops_test.cc @@ -183,6 +183,32 @@ TEST(MathOpTest, DimWithZeroHandling) { run(test5); } +TEST(MathOpTest, Add_int8) { + OpTester test("Add", 14); + test.AddInput("A", {3}, {1, -2, 3}); + test.AddInput("B", {3}, {4, 5, 6}); + test.AddOutput("C", {3}, {5, 3, 9}); + // Only run this test with CPU and Cuda EPs because on TensorRT EP engine creation fails. + if (nullptr != DefaultCpuExecutionProvider().get()) { + std::vector> execution_providers; + execution_providers.push_back(DefaultCpuExecutionProvider()); + test.Run(OpTester::ExpectResult::kExpectSuccess, "", {}, nullptr, &execution_providers); + } + if (nullptr != DefaultCudaExecutionProvider().get()) { + std::vector> execution_providers; + execution_providers.push_back(DefaultCudaExecutionProvider()); + test.Run(OpTester::ExpectResult::kExpectSuccess, "", {}, nullptr, &execution_providers); + } +} + +TEST(MathOpTest, Add_int16) { + OpTester test("Add", 14); + test.AddInput("A", {3}, {1, -2, 3}); + test.AddInput("B", {3}, {4, 5, 6}); + test.AddOutput("C", {3}, {5, 3, 9}); + test.Run(); +} + TEST(MathOpTest, Add_int32) { OpTester test("Add"); test.AddInput("A", {3}, {1, 2, 3}); @@ -199,6 +225,38 @@ TEST(MathOpTest, Add_int64) { test.Run(); } +TEST(MathOpTest, Add_uint8) { + OpTester test("Add", 14); + test.AddInput("A", {3}, {1, 2, 3}); + test.AddInput("B", {3}, {4, 5, 6}); + test.AddOutput("C", {3}, {5, 7, 9}); + test.Run(); +} + +TEST(MathOpTest, Add_uint16) { + OpTester test("Add", 14); + test.AddInput("A", {3}, {1, 2, 3}); + test.AddInput("B", {3}, {4, 5, 6}); + test.AddOutput("C", {3}, {5, 7, 9}); + test.Run(); +} + +TEST(MathOpTest, Add_uint32) { + OpTester test("Add"); + test.AddInput("A", {3}, {1, 2, 3}); + test.AddInput("B", {3}, {4, 5, 6}); + test.AddOutput("C", {3}, {5, 7, 9}); + test.Run(); +} + +TEST(MathOpTest, Add_uint64) { + OpTester test("Add"); + test.AddInput("A", {3}, {1, 2, 3}); + test.AddInput("B", {3}, {4, 5, 6}); + test.AddOutput("C", {3}, {5, 7, 9}); + test.Run(); +} + TEST(MathOpTest, Add_float) { OpTester test("Add"); std::vector dims{3, 3}; @@ -567,6 +625,32 @@ TEST(MathOpTest, Add_Invalid_Broadcast) { // test.Run(OpTester::ExpectResult::kExpectSuccess, "", {}, nullptr, &execution_providers); // } +TEST(MathOpTest, Sub_int8) { + OpTester test("Sub", 14); + test.AddInput("A", {3}, {1, 5, 6}); + test.AddInput("B", {3}, {4, 5, 3}); + test.AddOutput("C", {3}, {-3, 0, 3}); + // Only run this test with CPU and Cuda EPs because on TensorRT EP engine creation fails. + if (nullptr != DefaultCpuExecutionProvider().get()) { + std::vector> execution_providers; + execution_providers.push_back(DefaultCpuExecutionProvider()); + test.Run(OpTester::ExpectResult::kExpectSuccess, "", {}, nullptr, &execution_providers); + } + if (nullptr != DefaultCudaExecutionProvider().get()) { + std::vector> execution_providers; + execution_providers.push_back(DefaultCudaExecutionProvider()); + test.Run(OpTester::ExpectResult::kExpectSuccess, "", {}, nullptr, &execution_providers); + } +} + +TEST(MathOpTest, Sub_int16) { + OpTester test("Sub", 14); + test.AddInput("A", {3}, {1, 5, 6}); + test.AddInput("B", {3}, {4, 5, 3}); + test.AddOutput("C", {3}, {-3, 0, 3}); + test.Run(); +} + TEST(MathOpTest, Sub_int32) { OpTester test("Sub"); test.AddInput("A", {3}, {1, 4, 3}); @@ -583,7 +667,39 @@ TEST(MathOpTest, Sub_int64) { test.Run(); } -TEST(MathOpTest, Sub) { +TEST(MathOpTest, Sub_uint8) { + OpTester test("Sub", 14); + test.AddInput("A", {3}, {4, 5, 6}); + test.AddInput("B", {3}, {1, 5, 3}); + test.AddOutput("C", {3}, {3, 0, 3}); + test.Run(); +} + +TEST(MathOpTest, Sub_uint16) { + OpTester test("Sub", 14); + test.AddInput("A", {3}, {4, 5, 6}); + test.AddInput("B", {3}, {1, 5, 3}); + test.AddOutput("C", {3}, {3, 0, 3}); + test.Run(); +} + +TEST(MathOpTest, Sub_uint32) { + OpTester test("Sub"); + test.AddInput("A", {3}, {4, 5, 6}); + test.AddInput("B", {3}, {1, 5, 3}); + test.AddOutput("C", {3}, {3, 0, 3}); + test.Run(); +} + +TEST(MathOpTest, Sub_uint64) { + OpTester test("Sub"); + test.AddInput("A", {3}, {4, 5, 6}); + test.AddInput("B", {3}, {1, 5, 3}); + test.AddOutput("C", {3}, {3, 0, 3}); + test.Run(); +} + +TEST(MathOpTest, Sub_float) { OpTester test("Sub"); std::vector dims{3, 3}; std::initializer_list lhs_values{1.0f, 2.0f, -1.0f, 0.0f, 1.5f, -100.0f, -5.4f, 9.3f, -10000.0f}; @@ -601,6 +717,15 @@ TEST(MathOpTest, Sub) { #endif } +TEST(MathOpTest, Sub_double) { + OpTester test("Sub"); + std::vector dims{3, 3}; + test.AddInput("A", dims, {1.0, 2.0, -1.0, 0.0, 1.5, -100.0, -5.4, 9.3, -10000.0}); + test.AddInput("B", dims, {-1.0, 4.4, 432.3, 0.0, 3.5, 64.0, -5.4, 9.3, 10000.0}); + test.AddOutput("C", dims, {2.0, -2.4, -433.3, 0.0, -2.0, -164.0, 0.0, 0.0, -20000.0}); + test.Run(); +} + TEST(MathOpTest, Sub_Broadcast_Scalar) { auto run = [](bool scalar_as_initializer) { OpTester test("Sub"); @@ -621,6 +746,32 @@ TEST(MathOpTest, Sub_Broadcast_Scalar) { run(true); } +TEST(MathOpTest, Mul_int8) { + OpTester test("Mul", 14); + test.AddInput("A", {3}, {1, 2, 3}); + test.AddInput("B", {3}, {4, -3, 6}); + test.AddOutput("C", {3}, {4, -6, 18}); + // Only run this test with CPU and Cuda EPs because on TensorRT EP engine creation fails. + if (nullptr != DefaultCpuExecutionProvider().get()) { + std::vector> execution_providers; + execution_providers.push_back(DefaultCpuExecutionProvider()); + test.Run(OpTester::ExpectResult::kExpectSuccess, "", {}, nullptr, &execution_providers); + } + if (nullptr != DefaultCudaExecutionProvider().get()) { + std::vector> execution_providers; + execution_providers.push_back(DefaultCudaExecutionProvider()); + test.Run(OpTester::ExpectResult::kExpectSuccess, "", {}, nullptr, &execution_providers); + } +} + +TEST(MathOpTest, Mul_int16) { + OpTester test("Mul", 14); + test.AddInput("A", {3}, {3, 6, -3}); + test.AddInput("B", {3}, {4, -3, -2}); + test.AddOutput("C", {3}, {12, -18, 6}); + test.Run(); +} + TEST(MathOpTest, Mul_int32) { OpTester test("Mul"); test.AddInput("A", {3}, {1, 2, 3}); @@ -637,10 +788,42 @@ TEST(MathOpTest, Mul_int64) { test.Run(); } -TEST(MathOpTest, Mul) { +TEST(MathOpTest, Mul_uint8) { + OpTester test("Mul", 14); + test.AddInput("A", {3}, {1, 2, 3}); + test.AddInput("B", {3}, {4, 3, 6}); + test.AddOutput("C", {3}, {4, 6, 18}); + test.Run(); +} + +TEST(MathOpTest, Mul_uint16) { + OpTester test("Mul", 14); + test.AddInput("A", {3}, {3, 6, 3}); + test.AddInput("B", {3}, {4, 3, 2}); + test.AddOutput("C", {3}, {12, 18, 6}); + test.Run(); +} + +TEST(MathOpTest, Mul_uint32) { + OpTester test("Mul"); + test.AddInput("A", {3}, {1, 2, 3}); + test.AddInput("B", {3}, {4, 3, 6}); + test.AddOutput("C", {3}, {4, 6, 18}); + test.Run(); +} + +TEST(MathOpTest, Mul_uint64) { + OpTester test("Mul"); + test.AddInput("A", {3}, {3, 6, 3}); + test.AddInput("B", {3}, {4, 3, 2}); + test.AddOutput("C", {3}, {12, 18, 6}); + test.Run(); +} + +TEST(MathOpTest, Mul_float) { OpTester test("Mul"); std::vector dims{3, 3}; - std::initializer_list lhs_values{1.0f, 2.0f, -1.0f, 0.0f, 1.5f, -100.0f, -5.0f, 9.30f, -10000.0f}; + std::initializer_list lhs_values{1.0f, 2.0f, -1.0f, 0.0f, 1.5f, -100.0f, -5.0f, 9.3f, -10000.0f}; std::initializer_list rhs_values{-1.0f, 4.4f, 432.3f, 0.0f, 3.5f, 64.0f, -5.4f, 9.0f, 10000.0f}; std::initializer_list out_values{-1.0f, 8.8f, -432.3f, 0.0f, 5.25f, -6400.0f, 27.0f, 83.7f, -100000000.0f}; test.AddInput("A", dims, lhs_values); @@ -656,6 +839,41 @@ TEST(MathOpTest, Mul) { #endif } +TEST(MathOpTest, Mul_double) { + OpTester test("Mul"); + test.AddInput("A", {3, 3}, {1.0, 2.0, -1.0, 0.0, 1.5, -100.0, -5.0, 9.3, -10000.0}); + test.AddInput("B", {3, 3}, {-1.0, 4.4, 432.3, 0.0, 3.5, 64.0, -5.4, 9.0, 10000.0}); + test.AddOutput("C", {3, 3}, {-1.0, 8.8, -432.3, 0.0, 5.25, -6400.0, 27.0, 83.7, -100000000.0}); + test.Run(); +} + +TEST(MathOpTest, Div_int8) { + OpTester test("Div", 14); + test.AddInput("A", {3}, {4, 8, 8}); + test.AddInput("B", {3}, {1, -3, 2}); + test.AddOutput("C", {3}, {4, -2, 4}); + // Only run this test with CPU and Cuda EPs because on TensorRT EP engine creation fails and on + // DNNL EP the result for (8 / -3) is -3 instead of -2. + if (nullptr != DefaultCpuExecutionProvider().get()) { + std::vector> execution_providers; + execution_providers.push_back(DefaultCpuExecutionProvider()); + test.Run(OpTester::ExpectResult::kExpectSuccess, "", {}, nullptr, &execution_providers); + } + if (nullptr != DefaultCudaExecutionProvider().get()) { + std::vector> execution_providers; + execution_providers.push_back(DefaultCudaExecutionProvider()); + test.Run(OpTester::ExpectResult::kExpectSuccess, "", {}, nullptr, &execution_providers); + } +} + +TEST(MathOpTest, Div_int16) { + OpTester test("Div", 14); + test.AddInput("A", {3}, {4, 8, -8}); + test.AddInput("B", {3}, {2, 3, 4}); + test.AddOutput("C", {3}, {2, 2, -2}); + test.Run(); +} + TEST(MathOpTest, Div_int32) { OpTester test("Div"); test.AddInput("A", {3}, {4, 8, 8}); @@ -674,7 +892,49 @@ TEST(MathOpTest, Div_int64) { test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kTensorrtExecutionProvider, kOpenVINOExecutionProvider}); // TensorRT parser:elementwise inputs must not be Int32 } -TEST(MathOpTest, Div) { +TEST(MathOpTest, Div_uint8) { + OpTester test("Div", 14); + test.AddInput("A", {3}, {4, 8, 8}); + test.AddInput("B", {3}, {1, 3, 2}); + test.AddOutput("C", {3}, {4, 2, 4}); + // Only run this test with CPU and Cuda EPs because on DNNL EP the result for (8 / 3) is 3 instead of -2. + if (nullptr != DefaultCpuExecutionProvider().get()) { + std::vector> execution_providers; + execution_providers.push_back(DefaultCpuExecutionProvider()); + test.Run(OpTester::ExpectResult::kExpectSuccess, "", {}, nullptr, &execution_providers); + } + if (nullptr != DefaultCudaExecutionProvider().get()) { + std::vector> execution_providers; + execution_providers.push_back(DefaultCudaExecutionProvider()); + test.Run(OpTester::ExpectResult::kExpectSuccess, "", {}, nullptr, &execution_providers); + } +} + +TEST(MathOpTest, Div_uint16) { + OpTester test("Div", 14); + test.AddInput("A", {3}, {4, 8, 8}); + test.AddInput("B", {3}, {2, 3, 4}); + test.AddOutput("C", {3}, {2, 2, 2}); + test.Run(); +} + +TEST(MathOpTest, Div_uint32) { + OpTester test("Div"); + test.AddInput("A", {3}, {4, 8, 8}); + test.AddInput("B", {3}, {1, 3, 2}); + test.AddOutput("C", {3}, {4, 2, 4}); + test.Run(); +} + +TEST(MathOpTest, Div_uint64) { + OpTester test("Div"); + test.AddInput("A", {3}, {4, 8, 8}); + test.AddInput("B", {3}, {2, 3, 4}); + test.AddOutput("C", {3}, {2, 2, 2}); + test.Run(); +} + +TEST(MathOpTest, Div_float) { OpTester test("Div"); std::vector dims{2, 3}; std::initializer_list lhs_values{1000.0f, 1.0f, 6.0f, 0.0f, -10.0f, -1.0f}; @@ -692,6 +952,14 @@ TEST(MathOpTest, Div) { #endif } +TEST(MathOpTest, Div_double) { + OpTester test("Div"); + test.AddInput("A", {2, 3}, {1000.0, 1.0, 6.0, 0.0, -10.0, -1.0}); + test.AddInput("B", {2, 3}, {1000.0, 2.0, 3.0, 1.0, -1.0, 4.0}); + test.AddOutput("C", {2, 3}, {1.0, 0.5, 2.0, 0.0, 10.0, -0.25}); + test.Run(); +} + TEST(MathOpTest, Abs) { OpTester test("Abs"); std::vector dims{2, 2}; @@ -732,7 +1000,7 @@ TEST(MathOpTest, Abs_int32) { test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kTensorrtExecutionProvider}); // TensorRT parser: Int32 not allowed as input to this layer } -TEST(MathOpTest, Neg) { +TEST(MathOpTest, Neg_float) { OpTester test("Neg"); std::vector dims{2, 2}; test.AddInput("X", dims, @@ -744,6 +1012,18 @@ TEST(MathOpTest, Neg) { test.Run(); } +TEST(MathOpTest, Neg_double) { + OpTester test("Neg"); + std::vector dims{2, 2}; + test.AddInput("X", dims, + {1.0, -2.0, + 0.0, -10.0}); + test.AddOutput("Y", dims, + {-1.0, 2.0, + -0.0, 10.0}); + test.Run(); +} + TEST(MathOpTest, Neg_int8) { OpTester test("Neg"); std::vector dims{4}; @@ -754,6 +1034,14 @@ TEST(MathOpTest, Neg_int8) { test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kTensorrtExecutionProvider}); // TensorRT: INT8 is not supported } +TEST(MathOpTest, Neg_int16) { + OpTester test("Neg"); + std::vector dims{4}; + test.AddInput("X", dims, {1, -2, 0, -10}); + test.AddOutput("Y", dims, {-1, 2, 0, 10}); + test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kTensorrtExecutionProvider}); // TensorRT: Int16 not allowed as input to this layer +} + TEST(MathOpTest, Neg_int32) { OpTester test("Neg"); std::vector dims{4}; diff --git a/onnxruntime/test/providers/cpu/nn/pool_op_test.cc b/onnxruntime/test/providers/cpu/nn/pool_op_test.cc index a340f975ec91a..24a8c8491b632 100644 --- a/onnxruntime/test/providers/cpu/nn/pool_op_test.cc +++ b/onnxruntime/test/providers/cpu/nn/pool_op_test.cc @@ -1030,6 +1030,31 @@ TEST(PoolTest, AveragePool_19_dilation_2d) { kTensorrtExecutionProvider, kAclExecutionProvider, kOpenVINOExecutionProvider}); } +TEST(PoolTest, AveragePool_19_ceil_count_include_pad_1d) { + // TODO: Unskip when fixed #41968513 + if (DefaultDmlExecutionProvider().get() != nullptr) { + GTEST_SKIP() << "Skipping because of the following error: MLOperatorAuthorImpl.cpp(2100): The parameter is incorrect."; + } + + OpTester test("AveragePool", 19); + + test.AddAttribute("auto_pad", ""); + test.AddAttribute("strides", std::vector{3}); + test.AddAttribute("pads", vector{3, 3}); + test.AddAttribute("kernel_shape", vector{7}); + test.AddAttribute("ceil_mode", (int64_t)1); + test.AddAttribute("count_include_pad", (int64_t)1); + + std::vector x_vals = {2.0903f, 4.6493f, 1.6320f, -3.2051f, 4.6975f, 4.7296f, 3.3653f, -1.5815f, -2.3832f, 0.9628f, -1.5899f, -2.6820f, 5.7529f, 7.7346f, -0.8910f, -2.0151f, 0.1313f, -0.5374f}; + std::vector x_dims = {1, 2, 9}; + std::vector expected_dims = {1, 2, 4}; + std::vector expected_vals = {0.73807144f, 2.5655572f, 0.8032287f, -0.09990001f, 0.34911433f, 1.0389f, 1.4536142f, -0.40353334f}; + + test.AddInput("X", x_dims, x_vals); + test.AddOutput("Y", expected_dims, expected_vals); + test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kTensorrtExecutionProvider, kAclExecutionProvider, kOpenVINOExecutionProvider}); +} + TEST(PoolTest, GlobalAveragePool) { OpTester test("GlobalAveragePool"); diff --git a/onnxruntime/test/providers/cpu/rnn/LSTM.py b/onnxruntime/test/providers/cpu/rnn/LSTM.py index 49e28a93385a4..472fa5f844ac0 100644 --- a/onnxruntime/test/providers/cpu/rnn/LSTM.py +++ b/onnxruntime/test/providers/cpu/rnn/LSTM.py @@ -2,13 +2,7 @@ # Licensed under the MIT License. -from typing import Any, Tuple # noqa: F401 - -import numpy as np # type: ignore - -# import onnx -# from ..base import Base -# from . import expect +import numpy as np DebugOutput = True np.set_printoptions(suppress=True) # , precision=16, floatmode='maxprec') diff --git a/onnxruntime/test/providers/cpu/tensor/affine_grid_test_gen.py b/onnxruntime/test/providers/cpu/tensor/affine_grid_test_gen.py index 7dcd6484a5688..5276b70789db1 100644 --- a/onnxruntime/test/providers/cpu/tensor/affine_grid_test_gen.py +++ b/onnxruntime/test/providers/cpu/tensor/affine_grid_test_gen.py @@ -24,7 +24,7 @@ test_count = 0 for align_corners in align_corners_options: - for angle, translation, scale in zip(angles, translations, scales): + for angle, translation, scale in zip(angles, translations, scales, strict=False): for size in sizes: theta = np.array([], dtype=np.float32) for _ in range(size[0]): @@ -49,13 +49,13 @@ print(f' OpTester test("AffineGrid", {opset_version});') print(f' test.AddAttribute("align_corners", (int64_t){1 if align_corners else 0});') print( - f" test.AddInput(\"theta\", {{{theta.shape[0]}, {theta.shape[1]}, {theta.shape[2]}}}, {{{', '.join([f'{x:.6f}f' for x in theta.flatten()])}}});" + f' test.AddInput("theta", {{{theta.shape[0]}, {theta.shape[1]}, {theta.shape[2]}}}, {{{", ".join([f"{x:.6f}f" for x in theta.flatten()])}}});' ) print( f' test.AddInput("size", {{{len(size)}}}, {{{size[0]}, {size[1]}, {size[2]}, {size[3]}}});' ) print( - f" test.AddOutput(\"grid\", {{{size[0]}, {size[2]}, {size[3]}, 2}}, {{{', '.join([f'{x:.4f}f' for x in grid.flatten()])}}});" + f' test.AddOutput("grid", {{{size[0]}, {size[2]}, {size[3]}, 2}}, {{{", ".join([f"{x:.4f}f" for x in grid.flatten()])}}});' ) print(" test.Run();") print("}\n") @@ -71,7 +71,7 @@ test_count = 0 for align_corners in align_corners_options: - for angle, translation, scale in zip(angles, translations, scales): + for angle, translation, scale in zip(angles, translations, scales, strict=False): for size in sizes: theta = np.array([], dtype=np.float32) for _ in range(size[0]): @@ -104,13 +104,13 @@ print(f' OpTester test("AffineGrid", {opset_version});') print(f' test.AddAttribute("align_corners", (int64_t){1 if align_corners else 0});') print( - f" test.AddInput(\"theta\", {{{theta.shape[0]}, {theta.shape[1]}, {theta.shape[2]}}}, {{{', '.join([f'{x:.6f}f' for x in theta.flatten()])}}});" + f' test.AddInput("theta", {{{theta.shape[0]}, {theta.shape[1]}, {theta.shape[2]}}}, {{{", ".join([f"{x:.6f}f" for x in theta.flatten()])}}});' ) print( f' test.AddInput("size", {{{len(size)}}}, {{{size[0]}, {size[1]}, {size[2]}, {size[3]}, {size[4]}}});' ) print( - f" test.AddOutput(\"grid\", {{{size[0]}, {size[2]}, {size[3]}, {size[4]}, 3}}, {{{', '.join([f'{x:.4f}f' for x in grid.flatten()])}}});" + f' test.AddOutput("grid", {{{size[0]}, {size[2]}, {size[3]}, {size[4]}, 3}}, {{{", ".join([f"{x:.4f}f" for x in grid.flatten()])}}});' ) print(" test.Run();") print("}\n") diff --git a/onnxruntime/test/providers/cpu/tensor/grid_sample_test_gen.py b/onnxruntime/test/providers/cpu/tensor/grid_sample_test_gen.py index bf58a5d3fc1d5..627b681793657 100644 --- a/onnxruntime/test/providers/cpu/tensor/grid_sample_test_gen.py +++ b/onnxruntime/test/providers/cpu/tensor/grid_sample_test_gen.py @@ -80,11 +80,11 @@ class GridSampleTest : public ::testing::Test { print(f'{spaces}std::string padding_mode = "{padding_mode}";') print(f"{spaces}int64_t align_corners = {onnx_align_corners};") print(f"{spaces}std::initializer_list X_shape {{ {', '.join(map(str, input_shape))} }};") - print(f"{spaces}std::initializer_list X_data { X_data_str };") + print(f"{spaces}std::initializer_list X_data {X_data_str};") print(f"{spaces}std::initializer_list Grid_shape {{ {', '.join(map(str, grid_shape))} }};") - print(f"{spaces}std::initializer_list Grid_data { Grid_data_str };") + print(f"{spaces}std::initializer_list Grid_data {Grid_data_str};") print(f"{spaces}std::initializer_list Y_shape {{ {', '.join(map(str, Y_shape))} }};") - print(f"{spaces}std::initializer_list Y_data { Y_data_str };") + print(f"{spaces}std::initializer_list Y_data {Y_data_str};") print(f'{spaces}test.AddInput("X", X_shape, X_data);') print(f'{spaces}test.AddInput("Grid", Grid_shape, Grid_data);') diff --git a/onnxruntime/test/providers/cpu/tensor/slice_op.test.cc b/onnxruntime/test/providers/cpu/tensor/slice_op.test.cc index 2169436255727..dcbb953a2e05a 100644 --- a/onnxruntime/test/providers/cpu/tensor/slice_op.test.cc +++ b/onnxruntime/test/providers/cpu/tensor/slice_op.test.cc @@ -352,6 +352,9 @@ TEST(SliceTest, Slice1D_WithNegativeSteps_EndOutOfBounds_1) { } TEST(SliceTest, Slice1D_WithNegativeSteps_EndOutOfBounds_2) { + if (DefaultWebGpuExecutionProvider().get() != nullptr) { + GTEST_SKIP() << "Not covered by WebGPU test suite"; + } RunSliceTest({6}, {0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f}, {0}, @@ -536,6 +539,9 @@ TEST(SliceTest, Slice1D_ReverseAllAxes_1) { if (DefaultVSINPUExecutionProvider().get() != nullptr) { GTEST_SKIP() << "Skipping because of the following error: Expected output shape [{4}] did not match run output shape [{0}] for output"; } + if (DefaultWebGpuExecutionProvider().get() != nullptr) { + GTEST_SKIP() << "Not covered by WebGPU test suite"; + } RunSliceTest({4}, {1.0f, 2.0f, 3.0f, 4.0f}, @@ -550,6 +556,9 @@ TEST(SliceTest, Slice1D_ReverseAllAxes_1) { // With numeric_limit_min, the end value should be clamped to -1 TEST(SliceTest, Slice1D_ReverseAllAxes_2) { + if (DefaultWebGpuExecutionProvider().get() != nullptr) { + GTEST_SKIP() << "Not covered by WebGPU test suite"; + } RunSliceTest({4}, {1.0f, 2.0f, 3.0f, 4.0f}, {-1}, @@ -563,6 +572,9 @@ TEST(SliceTest, Slice1D_ReverseAllAxes_2) { // giving an end value < -{dim_value} should also clamp it to -1 TEST(SliceTest, Slice1D_ReverseAllAxes_3) { + if (DefaultWebGpuExecutionProvider().get() != nullptr) { + GTEST_SKIP() << "Not covered by WebGPU test suite"; + } RunSliceTest({4}, {1.0f, 2.0f, 3.0f, 4.0f}, {-1}, @@ -579,6 +591,9 @@ TEST(SliceTest, Slice2D_ReverseAllAxes) { if (DefaultDmlExecutionProvider().get() != nullptr) { GTEST_SKIP() << "Skipping because of the following error: Expected output shape [{4}] did not match run output shape [{0}] for output"; } + if (DefaultWebGpuExecutionProvider().get() != nullptr) { + GTEST_SKIP() << "Not covered by WebGPU test suite"; + } RunSliceTest({2, 2}, {1.0f, 2.0f, 3.0f, 4.0f}, @@ -596,6 +611,9 @@ TEST(SliceTest, Slice2D_ReverseSubsetOfAxes_1) { if (DefaultDmlExecutionProvider().get() != nullptr) { GTEST_SKIP() << "Skipping because of the following error: MLOperatorAuthorImpl.cpp(2100): The parameter is incorrect."; } + if (DefaultWebGpuExecutionProvider().get() != nullptr) { + GTEST_SKIP() << "Not covered by WebGPU test suite"; + } RunSliceTest({2, 2}, {1.0f, 2.0f, 3.0f, 4.0f}, @@ -613,6 +631,9 @@ TEST(SliceTest, Slice2D_ReverseSubsetOfAxes_2) { if (DefaultDmlExecutionProvider().get() != nullptr) { GTEST_SKIP() << "Skipping because of the following error: Expected output shape [{2,2}] did not match run output shape [{0,2}] for output"; } + if (DefaultWebGpuExecutionProvider().get() != nullptr) { + GTEST_SKIP() << "Not covered by WebGPU test suite"; + } RunSliceTest({2, 2}, {1.0f, 2.0f, 3.0f, 4.0f}, @@ -667,6 +688,9 @@ TEST(SliceTest, Slice2D_ReverseSubsetOfNegAxes_1) { if (DefaultDmlExecutionProvider().get() != nullptr) { GTEST_SKIP() << "Skipping because of the following error: Expected output shape [{2,2}] did not match run output shape [{2,0}] for output"; } + if (DefaultWebGpuExecutionProvider().get() != nullptr) { + GTEST_SKIP() << "Not covered by WebGPU test suite"; + } RunSliceTest({2, 2}, {1.0f, 2.0f, 3.0f, 4.0f}, diff --git a/onnxruntime/test/providers/cpu/tensor/space_depth_ops_test.cc b/onnxruntime/test/providers/cpu/tensor/space_depth_ops_test.cc index 4954b82690e0f..d0620a794e4d5 100644 --- a/onnxruntime/test/providers/cpu/tensor/space_depth_ops_test.cc +++ b/onnxruntime/test/providers/cpu/tensor/space_depth_ops_test.cc @@ -13,7 +13,7 @@ template class TensorOpTest : public ::testing::Test { }; -using TensorOpTestTypes = ::testing::Types; +using TensorOpTestTypes = ::testing::Types; TYPED_TEST_SUITE(TensorOpTest, TensorOpTestTypes); TEST(TensorOpTest, SpaceToDepthTest_1) { @@ -224,6 +224,7 @@ TEST(TensorOpTest, DepthToSpaceTest_1_double) { test.AddOutput("output", {N, C / (blocksize * blocksize), H * blocksize, W * blocksize}, result); test.Run(); } + TEST(TensorOpTest, DepthToSpaceTest_2) { OpTester test("DepthToSpace", 7); // create an opset 7 model constexpr int64_t blocksize = 2; @@ -308,14 +309,24 @@ TYPED_TEST(TensorOpTest, DepthToSpaceTest_3) { if constexpr (std::is_same::value) { test.AddInput("input", {N, C, H, W}, X); test.AddOutput("output", {2, 3, 6, 4}, result); - } else { + } else if constexpr (std::is_same::value) { std::vector X_fp16(X.size()); std::vector result_fp16(result.size()); - ConvertFloatToMLFloat16(result.data(), result_fp16.data(), result.size()); ConvertFloatToMLFloat16(X.data(), X_fp16.data(), X.size()); - test.AddOutput("output", {2, 3, 6, 4}, result_fp16); + ConvertFloatToMLFloat16(result.data(), result_fp16.data(), result.size()); test.AddInput("input", {N, C, H, W}, X_fp16); + test.AddOutput("output", {2, 3, 6, 4}, result_fp16); + } else if constexpr (std::is_same::value) { + std::vector X_u8(X.size()); + std::vector result_u8(result.size()); + ConvertFloatToUint8_t(X.data(), X_u8.data(), X.size()); + ConvertFloatToUint8_t(result.data(), result_u8.data(), result.size()); + test.AddInput("input", {N, C, H, W}, X_u8); + test.AddOutput("output", {2, 3, 6, 4}, result_u8); + } else { + ORT_THROW("Type not supported"); } + // TODO: Test is flaky on QNN EP (CPU backend). // Re-enable when the QnnCPUBackendTests.DISABLED_SpaceToDepth_Flaky test is fixed. test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kQnnExecutionProvider}); @@ -363,13 +374,22 @@ TYPED_TEST(TensorOpTest, DepthToSpaceTest_4) { if constexpr (std::is_same::value) { test.AddInput("input", {N, C, H, W}, X); test.AddOutput("output", {2, 3, 6, 4}, result); - } else { + } else if constexpr (std::is_same::value) { std::vector X_fp16(X.size()); std::vector result_fp16(result.size()); ConvertFloatToMLFloat16(X.data(), X_fp16.data(), X.size()); ConvertFloatToMLFloat16(result.data(), result_fp16.data(), result.size()); test.AddInput("input", {N, C, H, W}, X_fp16); test.AddOutput("output", {2, 3, 6, 4}, result_fp16); + } else if constexpr (std::is_same::value) { + std::vector X_u8(X.size()); + std::vector result_u8(result.size()); + ConvertFloatToUint8_t(X.data(), X_u8.data(), X.size()); + ConvertFloatToUint8_t(result.data(), result_u8.data(), result.size()); + test.AddInput("input", {N, C, H, W}, X_u8); + test.AddOutput("output", {2, 3, 6, 4}, result_u8); + } else { + ORT_THROW("Type not supported"); } // TODO: Test is flaky on QNN EP (CPU backend). @@ -401,14 +421,24 @@ TYPED_TEST(TensorOpTest, DepthToSpaceTest_5) { if constexpr (std::is_same::value) { test.AddInput("input", {N, C, H, W}, X); test.AddOutput("output", {1, 1, 4, 6}, result); - } else { + } else if constexpr (std::is_same::value) { std::vector X_fp16(X.size()); std::vector result_fp16(result.size()); ConvertFloatToMLFloat16(X.data(), X_fp16.data(), X.size()); ConvertFloatToMLFloat16(result.data(), result_fp16.data(), result.size()); test.AddInput("input", {N, C, H, W}, X_fp16); test.AddOutput("output", {1, 1, 4, 6}, result_fp16); + } else if constexpr (std::is_same::value) { + std::vector X_u8(X.size()); + std::vector result_u8(result.size()); + ConvertFloatToUint8_t(X.data(), X_u8.data(), X.size()); + ConvertFloatToUint8_t(result.data(), result_u8.data(), result.size()); + test.AddInput("input", {N, C, H, W}, X_u8); + test.AddOutput("output", {1, 1, 4, 6}, result_u8); + } else { + ORT_THROW("Type not supported"); } + // TODO: Test is flaky on QNN EP (CPU backend). // Re-enable when the QnnCPUBackendTests.DISABLED_SpaceToDepth_Flaky2 test is fixed. test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kQnnExecutionProvider}); diff --git a/onnxruntime/test/providers/provider_test_utils.h b/onnxruntime/test/providers/provider_test_utils.h index 87dc5980e9e50..988df1e7a5d3b 100644 --- a/onnxruntime/test/providers/provider_test_utils.h +++ b/onnxruntime/test/providers/provider_test_utils.h @@ -15,6 +15,12 @@ inline void ConvertFloatToMLFloat16(const float* f_datat, MLFloat16* h_data, siz output_vector = in_vector.template cast(); } +inline void ConvertFloatToUint8_t(const float* f_datat, uint8_t* u8_data, size_t input_size) { + auto in_vector = ConstEigenVectorMap(f_datat, input_size); + auto output_vector = EigenVectorMap(static_cast(static_cast(u8_data)), input_size); + output_vector = in_vector.template cast(); +} + inline void ConvertMLFloat16ToFloat(const MLFloat16* h_data, float* f_data, size_t input_size) { auto in_vector = ConstEigenVectorMap(static_cast(static_cast(h_data)), input_size); diff --git a/onnxruntime/test/providers/qnn/argmaxmin_op_test.cc b/onnxruntime/test/providers/qnn/argmaxmin_op_test.cc index c514cf16b2f3c..da6eda1317778 100644 --- a/onnxruntime/test/providers/qnn/argmaxmin_op_test.cc +++ b/onnxruntime/test/providers/qnn/argmaxmin_op_test.cc @@ -9,7 +9,7 @@ #include "core/graph/node_attr_utils.h" #include "test/providers/qnn/qnn_test_utils.h" -#include "onnx/onnx_pb.h" +#include "core/graph/onnx_protobuf.h" #include "gtest/gtest.h" namespace onnxruntime { diff --git a/onnxruntime/test/providers/qnn/average_pool_test.cc b/onnxruntime/test/providers/qnn/average_pool_test.cc index 1a0f9bfcbae97..f897a08da6b2e 100644 --- a/onnxruntime/test/providers/qnn/average_pool_test.cc +++ b/onnxruntime/test/providers/qnn/average_pool_test.cc @@ -11,7 +11,7 @@ #include "test/optimizer/qdq_test_utils.h" #include "test/providers/qnn/qnn_test_utils.h" -#include "onnx/onnx_pb.h" +#include "core/graph/onnx_protobuf.h" #include "gtest/gtest.h" diff --git a/onnxruntime/test/providers/qnn/cast_test.cc b/onnxruntime/test/providers/qnn/cast_test.cc index 9b83dd281a56d..e2e4b0d714e54 100644 --- a/onnxruntime/test/providers/qnn/cast_test.cc +++ b/onnxruntime/test/providers/qnn/cast_test.cc @@ -9,7 +9,7 @@ #include "test/optimizer/qdq_test_utils.h" #include "test/providers/qnn/qnn_test_utils.h" -#include "onnx/onnx_pb.h" +#include "core/graph/onnx_protobuf.h" #include "gtest/gtest.h" diff --git a/onnxruntime/test/providers/qnn/clip_op_test.cc b/onnxruntime/test/providers/qnn/clip_op_test.cc index cfa77a46210b3..21bd6fcc98d74 100644 --- a/onnxruntime/test/providers/qnn/clip_op_test.cc +++ b/onnxruntime/test/providers/qnn/clip_op_test.cc @@ -8,7 +8,7 @@ #include "test/providers/qnn/qnn_test_utils.h" #include "core/graph/node_attr_utils.h" -#include "onnx/onnx_pb.h" +#include "core/graph/onnx_protobuf.h" #include "gtest/gtest.h" namespace onnxruntime { diff --git a/onnxruntime/test/providers/qnn/conv_test.cc b/onnxruntime/test/providers/qnn/conv_test.cc index cf37fc00335d3..446e6eab6b1a9 100644 --- a/onnxruntime/test/providers/qnn/conv_test.cc +++ b/onnxruntime/test/providers/qnn/conv_test.cc @@ -1136,7 +1136,12 @@ TEST_F(QnnHTPBackendTests, Conv_PerChannel_UnsupportedAxis) { // QnnDsp Wake up free backend 1 thread(s) // QnnDsp QnnGraph_finalize done. status 0x3ea // onnxruntime::qnn::QnnModel::FinalizeGraphs] Failed to finalize QNN graph. +// Issue fixed in 2.30 +#if (QNN_API_VERSION_MAJOR == 2) && (QNN_API_VERSION_MINOR >= 23) +TEST_F(QnnHTPBackendTests, Conv3D_U8S8S32_PerChannel) { +#else TEST_F(QnnHTPBackendTests, DISABLED_Conv3D_U8S8S32_PerChannel) { +#endif std::vector input_shape = {1, 2, 4, 4, 4}; std::vector weight_shape = {3, 2, 2, 2, 2}; std::vector bias_shape = {3}; @@ -1201,7 +1206,12 @@ TEST_F(QnnHTPBackendTests, ConvDepthwiseU8S8S32_PerChannel) { // QnnDsp Wake up free backend 1 thread(s) // QnnDsp QnnGraph_finalize done. status 0x3ea // onnxruntime::qnn::QnnModel::FinalizeGraphs] Failed to finalize QNN graph. +// Issue fixed in 2.30 +#if (QNN_API_VERSION_MAJOR == 2) && (QNN_API_VERSION_MINOR >= 23) +TEST_F(QnnHTPBackendTests, Conv3D_U8S8S32_PerChannel2) { +#else TEST_F(QnnHTPBackendTests, DISABLED_Conv3D_U8S8S32_PerChannel2) { +#endif std::vector input_shape = {1, 2, 4, 4, 4}; std::vector weight_shape = {2, 1, 2, 2, 2}; std::vector bias_shape = {2}; @@ -1286,7 +1296,12 @@ TEST_F(QnnHTPBackendTests, ConvTranspose_PerChannel_UnsupportedAxis) { // ConvTranspose3D per-channel // Disable it for 2.21 since it failed, re-enabled it for 2.22 +// Issue fixed in 2.30 +#if (QNN_API_VERSION_MAJOR == 2) && (QNN_API_VERSION_MINOR >= 23) +TEST_F(QnnHTPBackendTests, ConvTranspose3D_U8S8S32_PerChannel) { +#else TEST_F(QnnHTPBackendTests, DISABLED_ConvTranspose3D_U8S8S32_PerChannel) { +#endif std::vector input_shape = {1, 2, 4, 4, 4}; std::vector weight_shape = {2, 3, 2, 2, 2}; std::vector bias_shape = {3}; @@ -1350,7 +1365,12 @@ TEST_F(QnnHTPBackendTests, ConvU16S8S32_PerChannel) { // QnnDsp Wake up free backend 1 thread(s) // QnnDsp QnnGraph_finalize done. status 0x3ea // onnxruntime::qnn::QnnModel::FinalizeGraphs] Failed to finalize QNN graph. +// Issue fixed in 2.30 +#if (QNN_API_VERSION_MAJOR == 2) && (QNN_API_VERSION_MINOR >= 23) +TEST_F(QnnHTPBackendTests, Conv3D_U16S8S32_PerChannel) { +#else TEST_F(QnnHTPBackendTests, DISABLED_Conv3D_U16S8S32_PerChannel) { +#endif std::vector input_shape = {1, 2, 4, 4, 4}; std::vector weight_shape = {3, 2, 2, 2, 2}; std::vector bias_shape = {3}; @@ -1406,7 +1426,12 @@ TEST_F(QnnHTPBackendTests, ConvTransposeU16S8S32_PerChannel) { } // Disable it for 2.21, re-enable it for 2.22 +// Issue fixed in 2.30 +#if (QNN_API_VERSION_MAJOR == 2) && (QNN_API_VERSION_MINOR >= 23) +TEST_F(QnnHTPBackendTests, ConvTranspose3D_U16S8S32_PerChannel) { +#else TEST_F(QnnHTPBackendTests, DISABLED_ConvTranspose3D_U16S8S32_PerChannel) { +#endif std::vector input_shape = {1, 2, 4, 4, 4}; std::vector weight_shape = {2, 3, 2, 2, 2}; std::vector bias_shape = {3}; @@ -1471,7 +1496,12 @@ TEST_F(QnnHTPBackendTests, ConvDepthwiseU16S8S32_PerChannel) { // QnnDsp Wake up free backend 1 thread(s) // QnnDsp QnnGraph_finalize done. status 0x3ea // onnxruntime::qnn::QnnModel::FinalizeGraphs] Failed to finalize QNN graph. +// Issue fixed in 2.30 +#if (QNN_API_VERSION_MAJOR == 2) && (QNN_API_VERSION_MINOR >= 23) +TEST_F(QnnHTPBackendTests, Conv3D_U16S8S32_PerChannel2) { +#else TEST_F(QnnHTPBackendTests, DISABLED_Conv3D_U16S8S32_PerChannel2) { +#endif std::vector input_shape = {1, 2, 4, 4, 4}; std::vector weight_shape = {2, 1, 2, 2, 2}; std::vector bias_shape = {2}; @@ -1824,7 +1854,12 @@ TEST_F(QnnHTPBackendTests, ConvTransposeU8U8S32_DynamicWeight_NoBias) { // Exception from backendValidateOpConfig: // Exception thrown at 0x00007FFF9E0128B0 (QnnHtpPrepare.dll) in onnxruntime_test_all.exe: // 0xC0000005: Access violation reading location 0x7079745F656C706D. +// Issue fixed in 2.30 +#if (QNN_API_VERSION_MAJOR == 2) && (QNN_API_VERSION_MINOR >= 23) +TEST_F(QnnHTPBackendTests, ConvTranspose3D_U8U8S32_DynamicWeight_NoBias) { +#else TEST_F(QnnHTPBackendTests, DISABLED_ConvTranspose3D_U8U8S32_DynamicWeight_NoBias) { +#endif RunHTPConvOpTest("ConvTranspose", TestInputDef({1, 3, 32, 32, 32}, false, -10.0f, 10.0f), // Input TestInputDef({3, 1, 4, 4, 4}, false, -10.0f, 10.0f), // Weights diff --git a/onnxruntime/test/providers/qnn/flatten_op_test.cc b/onnxruntime/test/providers/qnn/flatten_op_test.cc index 637d3257ddea7..b33f8f9c00fc4 100644 --- a/onnxruntime/test/providers/qnn/flatten_op_test.cc +++ b/onnxruntime/test/providers/qnn/flatten_op_test.cc @@ -8,7 +8,7 @@ #include "test/providers/qnn/qnn_test_utils.h" #include "core/graph/node_attr_utils.h" -#include "onnx/onnx_pb.h" +#include "core/graph/onnx_protobuf.h" #include "gtest/gtest.h" namespace onnxruntime { diff --git a/onnxruntime/test/providers/qnn/gather_elems_op_test.cc b/onnxruntime/test/providers/qnn/gather_elems_op_test.cc index 81c08873064c8..85dc792666827 100644 --- a/onnxruntime/test/providers/qnn/gather_elems_op_test.cc +++ b/onnxruntime/test/providers/qnn/gather_elems_op_test.cc @@ -11,7 +11,7 @@ #include "test/optimizer/qdq_test_utils.h" #include "test/providers/qnn/qnn_test_utils.h" -#include "onnx/onnx_pb.h" +#include "core/graph/onnx_protobuf.h" #include "gtest/gtest.h" diff --git a/onnxruntime/test/providers/qnn/gather_op_htp_test.cc b/onnxruntime/test/providers/qnn/gather_op_htp_test.cc index 21c373232187f..1d6fd53d066cd 100644 --- a/onnxruntime/test/providers/qnn/gather_op_htp_test.cc +++ b/onnxruntime/test/providers/qnn/gather_op_htp_test.cc @@ -149,7 +149,12 @@ TEST_F(QnnHTPBackendTests, GatherOp_IndicesDynamicInt32_Axis0) { // nodes are supported by the QNN EP, and that the inference results are as accurate as CPU EP. // // Static int32 indices with axis = 1 +// Issue fixed in 2.30 +#if (QNN_API_VERSION_MAJOR == 2) && (QNN_API_VERSION_MINOR >= 23) +TEST_F(QnnHTPBackendTests, GatherOp_IndicesStaticInt32_Axis1) { +#else TEST_F(QnnHTPBackendTests, DISABLED_GatherOp_IndicesStaticInt32_Axis1) { +#endif RunQDQGatherOpTest(TestInputDef({3, 3}, false, {1.0f, 1.2f, 1.9f, 2.3f, 3.4f, 3.9f, 4.5f, 5.7f, 5.9f}), TestInputDef({1, 2}, true, {0, 2}), {utils::MakeAttribute("axis", static_cast(1))}, diff --git a/onnxruntime/test/providers/qnn/gemm_op_test.cc b/onnxruntime/test/providers/qnn/gemm_op_test.cc index 33c868694c9c0..0c1146ba22360 100644 --- a/onnxruntime/test/providers/qnn/gemm_op_test.cc +++ b/onnxruntime/test/providers/qnn/gemm_op_test.cc @@ -9,7 +9,7 @@ #include "test/providers/qnn/qnn_test_utils.h" #include "core/graph/node_attr_utils.h" -#include "onnx/onnx_pb.h" +#include "core/graph/onnx_protobuf.h" #include "gtest/gtest.h" namespace onnxruntime { @@ -335,7 +335,12 @@ TEST_F(QnnHTPBackendTests, Gemm_Broadcast_Bias_DynamicA_StaticB_StaticC) { // Expected val: 120.73912048339844 // QNN QDQ val: 0 (err 120.73912048339844) // CPU QDQ val: 120.73889923095703 (err 0.00022125244140625) +// Issue fixed in 2.30 +#if (QNN_API_VERSION_MAJOR == 2) && (QNN_API_VERSION_MINOR >= 23) +TEST_F(QnnHTPBackendTests, Gemm_Dynamic_A_Static_B_Dynamic_Bias_U16) { +#else TEST_F(QnnHTPBackendTests, DISABLED_Gemm_Dynamic_A_Static_B_Dynamic_Bias_U16) { +#endif std::vector input_a_data = GetFloatDataInRange(-10.0f, 10.0f, 6); std::vector input_b_data = GetFloatDataInRange(-5.0f, 5.0f, 24); std::vector input_c_data = GetFloatDataInRange(-1.0f, 1.0f, 4); @@ -368,7 +373,12 @@ TEST_F(QnnHTPBackendTests, Gemm_Dynamic_A_Static_B_Dynamic_Bias_U16Act_U8Weight) // Expected val: 120.73912048339844 // QNN QDQ val: 77.012794494628906 (err 43.726325988769531) // CPU QDQ val: 119.85115814208984 (err 0.88796234130859375) +// Issue fixed in 2.30 +#if (QNN_API_VERSION_MAJOR == 2) && (QNN_API_VERSION_MINOR >= 23) +TEST_F(QnnHTPBackendTests, Gemm_Dynamic_A_B_Static_Bias) { +#else TEST_F(QnnHTPBackendTests, DISABLED_Gemm_Dynamic_A_B_Static_Bias) { +#endif std::vector input_a_data = GetFloatDataInRange(-10.0f, 10.0f, 6); std::vector input_b_data = GetFloatDataInRange(-5.0f, 5.0f, 24); std::vector input_c_data = GetFloatDataInRange(-1.0f, 1.0f, 4); diff --git a/onnxruntime/test/providers/qnn/logical_comp_ops_test.cc b/onnxruntime/test/providers/qnn/logical_comp_ops_test.cc index 59105136781f4..522b781379119 100644 --- a/onnxruntime/test/providers/qnn/logical_comp_ops_test.cc +++ b/onnxruntime/test/providers/qnn/logical_comp_ops_test.cc @@ -9,7 +9,7 @@ #include "test/optimizer/qdq_test_utils.h" #include "test/providers/qnn/qnn_test_utils.h" -#include "onnx/onnx_pb.h" +#include "core/graph/onnx_protobuf.h" #include "gtest/gtest.h" diff --git a/onnxruntime/test/providers/qnn/lrn_op_test.cc b/onnxruntime/test/providers/qnn/lrn_op_test.cc index a99cba66bf167..4b26ed0da93c7 100644 --- a/onnxruntime/test/providers/qnn/lrn_op_test.cc +++ b/onnxruntime/test/providers/qnn/lrn_op_test.cc @@ -9,7 +9,7 @@ #include "test/optimizer/qdq_test_utils.h" #include "test/providers/qnn/qnn_test_utils.h" -#include "onnx/onnx_pb.h" +#include "core/graph/onnx_protobuf.h" #include "gtest/gtest.h" diff --git a/onnxruntime/test/providers/qnn/matmul_test.cpp b/onnxruntime/test/providers/qnn/matmul_test.cpp index 74edc25939e00..dec9369b81748 100644 --- a/onnxruntime/test/providers/qnn/matmul_test.cpp +++ b/onnxruntime/test/providers/qnn/matmul_test.cpp @@ -8,7 +8,7 @@ #include "test/providers/qnn/qnn_test_utils.h" -#include "onnx/onnx_pb.h" +#include "core/graph/onnx_protobuf.h" #include "gtest/gtest.h" @@ -209,7 +209,14 @@ TEST_F(QnnCPUBackendTests, MatMulOp) { RunMatMulOpTest(false, {3, 3, 3}, {3, 2}, true, false); RunMatMulOpTest(false, {2, 3, 3, 3}, {3, 2}, false, true); RunMatMulOpTest(false, {2, 3, 3, 3}, {2, 3, 3, 2}, false, true); + +#if defined(__linux__) + // TODO: This fails on Linux (HTP emulation). Works on Windows ARM64. + // Expected: contains 24 values, where each value and its corresponding value in 16-byte object <18-00 00-00 00-00 00-00 00-29 4E-53 A8-55 00-00> are an almost-equal pair + // Actual: 16-byte object <18-00 00-00 00-00 00-00 80-28 3E-53 A8-55 00-00>, where the value pair (0.0285999943, 0) at index #12 don't match, which is -0.0286 from 0.0286 +#else RunMatMulOpTest(false, {2, 1, 2, 3}, {3, 3, 2}, false, false); +#endif RunMatMulOpTest(false, {3}, {3}, false, false); RunMatMulOpTest(false, {3}, {3}, false, true); RunMatMulOpTest(false, {3}, {3}, true, false); @@ -233,7 +240,8 @@ TEST_F(QnnCPUBackendTests, MatMulOp) { // // HTP tests: // -TEST_F(QnnHTPBackendTests, MatMulOp) { +// Disable this for now as the QNN HTP backend is not stable on different versions and platforms so it failed randomly. +TEST_F(QnnHTPBackendTests, DISABLED_MatMulOp) { // RunMatMulOpTest(is_htp_backend, shape_0, shape_1, is_initializer_0, is_initializer_1, expected_ep_assignment, // opset, f32_abs_err) RunMatMulOpTest(true, {2, 3}, {3, 2}, false, false, ExpectedEPNodeAssignment::All, 18, 1e-2f); @@ -289,10 +297,65 @@ TEST_F(QnnHTPBackendTests, MatMulOp_QDQ) { RunQDQPerChannelMatMulOpTest({2, 3, 3, 3}, {3, 2}, -1, QDQTolerance(), ExpectedEPNodeAssignment::All, 18, true); - // // UINT16, per-channel INT8 weight + // UINT16, per-channel INT8 weight RunQDQPerChannelMatMulOpTest({2, 3}, {3, 2}, 1, QDQTolerance(), ExpectedEPNodeAssignment::All, 21, false, false); - RunQDQPerChannelMatMulOpTest({2, 3, 3}, {3}, -1); + RunQDQPerChannelMatMulOpTest({2, 3, 3}, {3}, -1, QDQTolerance(0.005f)); +} + +// Tests MatMul with two uint16 (quantized) inputs that are both dynamic. +// This exercises a workaround in QNN EP that inserts a QNN Convert op before input[1] (converts from uint16 to uint8). +// This workaround prevents a validation error for this specific MatMul configuration. +// Got specific shapes and input ranges (quant params) from customer model. +TEST_F(QnnHTPBackendTests, MatMulOp_QDQ_Regression_uint16_dynamic_inputs) { + ProviderOptions provider_options; +#if defined(_WIN32) + provider_options["backend_path"] = "QnnHtp.dll"; +#else + provider_options["backend_path"] = "libQnnHtp.so"; +#endif + + // Test with rank 4 inputs + { + std::vector shape_0 = {1, 12, 512, 96}; + TestInputDef input0_def( + {1, 12, 512, 96}, false, + GetFloatDataInRange(-5.087f, 4.992f, + static_cast(std::accumulate(shape_0.begin(), shape_0.end(), static_cast(1), + std::multiplies())))); + std::vector shape_1 = {1, 12, 96, 512}; + TestInputDef input1_def( + shape_1, false, + GetFloatDataInRange(-6.772f, 7.258f, + static_cast(std::accumulate(shape_1.begin(), shape_1.end(), static_cast(1), + std::multiplies())))); + + TestQDQModelAccuracy( + BuildMatMulOpTestCase(input0_def, input1_def), + BuildMatMulOpQDQTestCase(input0_def, input1_def, false), + provider_options, 21, ExpectedEPNodeAssignment::All, QDQTolerance()); + } + + // Test with input[1] as rank 1 + { + std::vector shape_0 = {1, 12, 512, 96}; + TestInputDef input0_def( + {1, 12, 512, 96}, false, + GetFloatDataInRange(-5.087f, 4.992f, + static_cast(std::accumulate(shape_0.begin(), shape_0.end(), static_cast(1), + std::multiplies())))); + std::vector shape_1 = {96}; + TestInputDef input1_def( + shape_1, false, + GetFloatDataInRange(-6.772f, 7.258f, + static_cast(std::accumulate(shape_1.begin(), shape_1.end(), static_cast(1), + std::multiplies())))); + + TestQDQModelAccuracy( + BuildMatMulOpTestCase(input0_def, input1_def), + BuildMatMulOpQDQTestCase(input0_def, input1_def, false), + provider_options, 21, ExpectedEPNodeAssignment::All, QDQTolerance()); + } } #endif // defined(__aarch64__) || defined(_M_ARM64) || defined(__linux__) diff --git a/onnxruntime/test/providers/qnn/max_min_op_test.cc b/onnxruntime/test/providers/qnn/max_min_op_test.cc index 3deff121f3c72..9a45d11b7e34f 100644 --- a/onnxruntime/test/providers/qnn/max_min_op_test.cc +++ b/onnxruntime/test/providers/qnn/max_min_op_test.cc @@ -7,7 +7,7 @@ #include "test/providers/qnn/qnn_test_utils.h" -#include "onnx/onnx_pb.h" +#include "core/graph/onnx_protobuf.h" #include "gtest/gtest.h" namespace onnxruntime { diff --git a/onnxruntime/test/providers/qnn/pad_op_test.cpp b/onnxruntime/test/providers/qnn/pad_op_test.cpp index a6b8664c6c0c9..4ce6db7facc69 100644 --- a/onnxruntime/test/providers/qnn/pad_op_test.cpp +++ b/onnxruntime/test/providers/qnn/pad_op_test.cpp @@ -10,7 +10,7 @@ #include "test/optimizer/qdq_test_utils.h" #include "test/providers/qnn/qnn_test_utils.h" -#include "onnx/onnx_pb.h" +#include "core/graph/onnx_protobuf.h" #include "gtest/gtest.h" diff --git a/onnxruntime/test/providers/qnn/pool_op_test.cpp b/onnxruntime/test/providers/qnn/pool_op_test.cpp index 5dd3a6aaa3620..f0ca3557191c7 100644 --- a/onnxruntime/test/providers/qnn/pool_op_test.cpp +++ b/onnxruntime/test/providers/qnn/pool_op_test.cpp @@ -10,7 +10,7 @@ #include "test/optimizer/qdq_test_utils.h" #include "test/providers/qnn/qnn_test_utils.h" -#include "onnx/onnx_pb.h" +#include "core/graph/onnx_protobuf.h" #include "gtest/gtest.h" diff --git a/onnxruntime/test/providers/qnn/qnn_basic_test.cc b/onnxruntime/test/providers/qnn/qnn_basic_test.cc index e8282dbad9f72..a6fb66472844a 100644 --- a/onnxruntime/test/providers/qnn/qnn_basic_test.cc +++ b/onnxruntime/test/providers/qnn/qnn_basic_test.cc @@ -5,11 +5,14 @@ #include #include +#include "core/providers/cpu/cpu_provider_factory.h" // For OrtSessionOptionsAppendExecutionProvider_CPU +#if BUILD_QNN_EP_STATIC_LIB +#include "core/providers/qnn/qnn_allocator.h" // Used by QnnHTPBackendTests.UseHtpSharedMemoryAllocatorForInputs +#endif +#include "core/session/inference_session.h" #include "core/session/onnxruntime_cxx_api.h" #include "core/session/onnxruntime_session_options_config_keys.h" #include "core/session/onnxruntime_run_options_config_keys.h" -#include "core/providers/cpu/cpu_provider_factory.h" // For OrtSessionOptionsAppendExecutionProvider_CPU -#include "core/session/inference_session.h" #include "test/providers/qnn/qnn_test_utils.h" @@ -1098,6 +1101,57 @@ TEST_F(QnnHTPBackendTests, EPOffloadsGraphIOQuantDequant) { } } +// Only compile this test when QNN EP is built as a static library. When QNN EP is a shared library, +// we cannot include internal QNN EP headers that use the provider-bridge API. +#if BUILD_QNN_EP_STATIC_LIB +TEST_F(QnnHTPBackendTests, UseHtpSharedMemoryAllocatorForInputs) { + ProviderOptions provider_options; +#if defined(_WIN32) + provider_options["backend_path"] = "QnnHtp.dll"; +#else + provider_options["backend_path"] = "libQnnHtp.so"; +#endif + provider_options["enable_htp_shared_memory_allocator"] = "1"; + + std::unique_ptr qnn_ep; + try { + qnn_ep = QnnExecutionProviderWithOptions(provider_options); + } catch (const OnnxRuntimeException& e) { + // handle particular exception that indicates that the libcdsprpc.so / dll can't be loaded + // NOTE: To run this on a local Windows ARM64 device, you need to copy libcdsprpc.dll to the build directory: + // - Open File Explorer + // - Go to C:/Windows/System32/DriverStore/FileRepository/ + // - Search for a folder that begins with qcnspmcdm8380.inf_arm64_ and open it + // - Copy the libcdsprpc.dll into the build/[PATH CONTAINING onnxruntime.dll] directory of the application. + // TODO(adrianlizarraga): Update CMake build for unittests to automatically copy libcdsprpc.dll into build directory +#if defined(_WIN32) + constexpr const char* expected_error_message = "Failed to load libcdsprpc.dll"; +#else + constexpr const char* expected_error_message = "Failed to load libcdsprpc.so"; +#endif + ASSERT_THAT(e.what(), testing::HasSubstr(expected_error_message)); + GTEST_SKIP() << "HTP shared memory allocator is unavailable."; + } + + AllocatorPtr htp_shared_memory_allocator{}; + { + auto allocators = qnn_ep->CreatePreferredAllocators(); + ASSERT_FALSE(allocators.empty()); + auto& allocator = allocators[0]; + ASSERT_EQ(allocator->Info(), qnn::HtpSharedMemoryAllocator::AssociatedMemoryInfo()); + htp_shared_memory_allocator = std::move(allocator); + } + + auto input_defs = {TestInputDef({1, 2, 2, 2}, false, -10.0f, 10.0f), + TestInputDef({1, 2, 2, 2}, false, -10.0f, 10.0f)}; + RunQnnModelTest(BuildOpTestCase("Add", input_defs, {}, {}, kOnnxDomain, htp_shared_memory_allocator), + provider_options, + 13, + ExpectedEPNodeAssignment::All, + 0.008f); +} +#endif // BUILD_QNN_EP_STATIC_LIB + #endif // defined(__aarch64__) || defined(_M_ARM64) || defined(__linux__) #endif // !defined(ORT_MINIMAL_BUILD) diff --git a/onnxruntime/test/providers/qnn/qnn_ep_context_test.cc b/onnxruntime/test/providers/qnn/qnn_ep_context_test.cc index a3f0ed55b83f2..38fde332ca992 100644 --- a/onnxruntime/test/providers/qnn/qnn_ep_context_test.cc +++ b/onnxruntime/test/providers/qnn/qnn_ep_context_test.cc @@ -7,7 +7,6 @@ #include "core/session/onnxruntime_cxx_api.h" #include "core/session/onnxruntime_session_options_config_keys.h" #include "core/session/inference_session.h" -#include "core/providers/shared/utils/utils.h" #include "test/providers/qnn/qnn_test_utils.h" @@ -25,6 +24,24 @@ namespace test { #if defined(__aarch64__) || defined(_M_ARM64) || defined(__linux__) +static int64_t GetNodeAttr(const Node& node, const std::string& attr_name, int64_t default_val) { + const auto& attributes = node.GetAttributes(); + if (auto entry = attributes.find(attr_name); entry != attributes.end()) { + return entry->second.i(); + } + + return default_val; +} + +static const std::string& GetNodeAttr(const Node& node, const std::string& attr_name, const std::string& default_val) { + const auto& attributes = node.GetAttributes(); + if (auto entry = attributes.find(attr_name); entry != attributes.end()) { + return entry->second.s(); + } + + return default_val; +} + // Create a model with FusedMatMul + Add (quantized) // input1 -> Add -> Q -> DQ \ // FusedMatMul -> Q -> DQ -> output @@ -873,10 +890,9 @@ static void GetLastContextBinaryFileName(const std::string last_onnx_ctx_file, auto& ctx_graph = ctx_model->MainGraph(); for (auto& node : ctx_graph.Nodes()) { if (node.OpType() == "EPContext") { - NodeAttrHelper node_helper(node); - int64_t is_main_context = node_helper.Get("main_context", static_cast(0)); + int64_t is_main_context = GetNodeAttr(node, "main_context", static_cast(0)); if (1 == is_main_context) { - last_ctx_bin_file = node_helper.Get("ep_cache_context", ""); + last_ctx_bin_file = GetNodeAttr(node, "ep_cache_context", ""); return; } } @@ -899,10 +915,9 @@ static void UpdateEpContextModel(const std::vector& ep_ctx_files, for (auto& node : ctx_graph.Nodes()) { if (node.OpType() == "EPContext") { - NodeAttrHelper node_helper(node); - int64_t is_main_context = node_helper.Get("main_context", static_cast(0)); + int64_t is_main_context = GetNodeAttr(node, "main_context", static_cast(0)); if (1 == is_main_context) { - std::string old_qnn_ctx_binary_file_name = node_helper.Get("ep_cache_context", ""); + std::string old_qnn_ctx_binary_file_name = GetNodeAttr(node, "ep_cache_context", ""); auto file_path = path.replace_filename(old_qnn_ctx_binary_file_name); std::remove(file_path.string().c_str()); node.ClearAttribute("ep_cache_context"); diff --git a/onnxruntime/test/providers/qnn/qnn_test_utils.h b/onnxruntime/test/providers/qnn/qnn_test_utils.h index a8670252ff9e0..676460e108b0e 100644 --- a/onnxruntime/test/providers/qnn/qnn_test_utils.h +++ b/onnxruntime/test/providers/qnn/qnn_test_utils.h @@ -901,10 +901,12 @@ inline void TestFp16ModelAccuracy(const GetTestModelFn& f32_model_fn, * * \param builder Model builder object used to build the model's inputs, outputs, and nodes. * \param input_def Input definition that describes what kind of input to create. + * \param allocator Optional allocator to use to allocate the input ORT value. * \return A pointer to the new input. */ template -inline NodeArg* MakeTestInput(ModelTestBuilder& builder, const TestInputDef& input_def) { +inline NodeArg* MakeTestInput(ModelTestBuilder& builder, const TestInputDef& input_def, + AllocatorPtr allocator = nullptr) { NodeArg* input = nullptr; const auto& shape = input_def.GetShape(); const bool is_initializer = input_def.IsInitializer(); @@ -915,7 +917,7 @@ inline NodeArg* MakeTestInput(ModelTestBuilder& builder, const TestInputDef& if (is_initializer) { input = builder.MakeInitializer(shape, raw_data); } else { - input = builder.MakeInput(shape, raw_data); + input = builder.MakeInput(shape, raw_data, allocator); } } else { // Random data const auto& rand_info = input_def.GetRandomDataInfo(); @@ -923,7 +925,7 @@ inline NodeArg* MakeTestInput(ModelTestBuilder& builder, const TestInputDef& if (is_initializer) { input = builder.MakeInitializer(shape, rand_info.min, rand_info.max); } else { - input = builder.MakeInput(shape, rand_info.min, rand_info.max); + input = builder.MakeInput(shape, rand_info.min, rand_info.max, allocator); } } @@ -931,7 +933,8 @@ inline NodeArg* MakeTestInput(ModelTestBuilder& builder, const TestInputDef& } template <> -inline NodeArg* MakeTestInput(ModelTestBuilder& builder, const TestInputDef& input_def) { +inline NodeArg* MakeTestInput(ModelTestBuilder& builder, const TestInputDef& input_def, + AllocatorPtr allocator) { NodeArg* input = nullptr; const auto& shape = input_def.GetShape(); const bool is_initializer = input_def.IsInitializer(); @@ -942,13 +945,13 @@ inline NodeArg* MakeTestInput(ModelTestBuilder& builder, const TestInputDef(shape, raw_data); + input = builder.MakeInput(shape, raw_data, allocator); } } else { // Random data if (is_initializer) { input = builder.MakeRandInitializerBool(shape); } else { - input = builder.MakeInputBool(shape); + input = builder.MakeInputBool(shape, allocator); } } @@ -973,6 +976,7 @@ NodeArg* MakeTestQDQBiasInput(ModelTestBuilder& builder, const TestInputDef @@ -980,18 +984,19 @@ inline GetTestModelFn BuildOpTestCase(const std::string& op_type, const std::vector>& input_defs_1, const std::vector>& input_defs_2, const std::vector& attrs, - const std::string& op_domain = kOnnxDomain) { - return [op_type, input_defs_1, input_defs_2, attrs, op_domain](ModelTestBuilder& builder) { + const std::string& op_domain = kOnnxDomain, + AllocatorPtr input_allocator = nullptr) { + return [op_type, input_defs_1, input_defs_2, attrs, op_domain, input_allocator](ModelTestBuilder& builder) { std::vector op_inputs; op_inputs.reserve(input_defs_1.size() + input_defs_2.size()); for (const auto& input_def : input_defs_1) { - NodeArg* input = MakeTestInput(builder, input_def); + NodeArg* input = MakeTestInput(builder, input_def, input_allocator); op_inputs.push_back(input); } for (const auto& input_def : input_defs_2) { - NodeArg* input = MakeTestInput(builder, input_def); + NodeArg* input = MakeTestInput(builder, input_def, input_allocator); op_inputs.push_back(input); } @@ -1012,6 +1017,8 @@ inline GetTestModelFn BuildOpTestCase(const std::string& op_type, * \param input_defs List of input definitions. * \param attrs List of operator attributes. * \param op_domain The operator's domain. Defaults to the ONNX domain (i.e., ""). + * \param use_contrib_qdq Whether to use Q/DQ ops from the MS domain instead of the ONNX domain. + * \param input_allocator Optional allocator to use to allocate input ORT values. * \returns A model building function. */ template @@ -1021,15 +1028,17 @@ inline GetTestQDQModelFn BuildQDQOpTestCase( const std::vector>& non_quant_input_defs, const std::vector& attrs, const std::string& op_domain = kOnnxDomain, - bool use_contrib_qdq = false) { + bool use_contrib_qdq = false, + AllocatorPtr input_allocator = nullptr) { return [op_type, quant_input_defs, non_quant_input_defs, attrs, op_domain, - use_contrib_qdq](ModelTestBuilder& builder, std::vector>& output_qparams) { + use_contrib_qdq, input_allocator]( + ModelTestBuilder& builder, std::vector>& output_qparams) { std::vector op_inputs; op_inputs.reserve(quant_input_defs.size() + non_quant_input_defs.size()); // Create QDQ inputs for (const auto& input_def : quant_input_defs) { - NodeArg* input = MakeTestInput(builder, input_def); + NodeArg* input = MakeTestInput(builder, input_def, input_allocator); QuantParams input_qparams = GetTestInputQuantParams(input_def); NodeArg* input_after_qdq = AddQDQNodePair(builder, input, input_qparams.scale, input_qparams.zero_point, use_contrib_qdq); @@ -1038,7 +1047,7 @@ inline GetTestQDQModelFn BuildQDQOpTestCase( // Create non-QDQ inputs for (const auto& input_def : non_quant_input_defs) { - NodeArg* input = MakeTestInput(builder, input_def); + NodeArg* input = MakeTestInput(builder, input_def, input_allocator); op_inputs.push_back(input); } diff --git a/onnxruntime/test/providers/qnn/reduce_op_test.cc b/onnxruntime/test/providers/qnn/reduce_op_test.cc index e4abe85908373..b7b1960ab2c8a 100644 --- a/onnxruntime/test/providers/qnn/reduce_op_test.cc +++ b/onnxruntime/test/providers/qnn/reduce_op_test.cc @@ -336,7 +336,12 @@ TEST_F(QnnCPUBackendTests, ReduceL2Opset13) { // HTP backend with FP16 precision, and that the inference results match the CPU EP results. // // Failed QNN Opvalidation because of 5D input. It runs OK if bypass the op validation +// Issue fixed in 2.30 +#if (QNN_API_VERSION_MAJOR == 2) && (QNN_API_VERSION_MINOR >= 23) +TEST_F(QnnHTPBackendTests, ReduceSumOpset11_5D_FP16) { +#else TEST_F(QnnHTPBackendTests, DISABLED_ReduceSumOpset11_5D_FP16) { +#endif float fp32_abs_err = 3e-2f; bool enable_fp16 = true; RunReduceTest("ReduceSum", diff --git a/onnxruntime/test/providers/qnn/reshape_expand_op_test.cc b/onnxruntime/test/providers/qnn/reshape_expand_op_test.cc index 3964edc11461b..b66547a939983 100644 --- a/onnxruntime/test/providers/qnn/reshape_expand_op_test.cc +++ b/onnxruntime/test/providers/qnn/reshape_expand_op_test.cc @@ -8,7 +8,7 @@ #include "test/providers/qnn/qnn_test_utils.h" #include "core/graph/node_attr_utils.h" -#include "onnx/onnx_pb.h" +#include "core/graph/onnx_protobuf.h" #include "gtest/gtest.h" namespace onnxruntime { diff --git a/onnxruntime/test/providers/qnn/resize_test.cc b/onnxruntime/test/providers/qnn/resize_test.cc index 15612e3267a75..651f55bc05d3f 100644 --- a/onnxruntime/test/providers/qnn/resize_test.cc +++ b/onnxruntime/test/providers/qnn/resize_test.cc @@ -9,7 +9,7 @@ #include "test/optimizer/qdq_test_utils.h" #include "test/providers/qnn/qnn_test_utils.h" -#include "onnx/onnx_pb.h" +#include "core/graph/onnx_protobuf.h" #include "gtest/gtest.h" diff --git a/onnxruntime/test/providers/qnn/simple_op_htp_test.cc b/onnxruntime/test/providers/qnn/simple_op_htp_test.cc index 7541d94bac0c6..83a02a61b172d 100644 --- a/onnxruntime/test/providers/qnn/simple_op_htp_test.cc +++ b/onnxruntime/test/providers/qnn/simple_op_htp_test.cc @@ -230,7 +230,7 @@ TEST_F(QnnHTPBackendTests, UnaryOp_Tanh) { } // disabled for QNN 2.28.0.241029 backendValidateOpConfig failed -// still fails on QNN 2.28.2. +// still fails on QNN 2.28.2 and QNN 2.30.0 // QnnDsp [4294967295] has incorrect Value -32768, expected equal to 0. // QnnDsp validateNativeOps node_token_6:qti.aisw:Tanh htp op validator failed 3110 // QnnDsp registered validator failed => 3110 @@ -238,9 +238,11 @@ TEST_F(QnnHTPBackendTests, UnaryOp_Tanh) { // QnnDsp Wake up free backend (id: 1)'s thread(s) // QnnDsp Failed to validate op node_token_6 with error 0xc26 // Tests accuracy of 16-bit QDQ Tanh. -TEST_F(QnnHTPBackendTests, DISABLED_UnaryOp_Tanh_U16) { +// +// We now skip QNN validation as a workaround for QNN SDK 2.28.0 to 2.30.0 +TEST_F(QnnHTPBackendTests, UnaryOp_Tanh_U16) { RunQDQOpTest("Tanh", - {TestInputDef({1, 2, 3}, false, GetFloatDataInRange(-10.0f, 10.0f, 6))}, + {TestInputDef({1, 2, 64}, false, GetFloatDataInRange(-10.0f, 10.0f, 128))}, {}, 13, ExpectedEPNodeAssignment::All, @@ -665,7 +667,12 @@ TEST_F(QnnHTPBackendTests, UnaryOp_Ceil) { // CPU EP f32 model output: [-12.0, -7.0, -2.0, 3.0, 8.0, 12.0] // CPU EP qdq model output: [-12.0, -6.99, -1.99, 3.0, 8.0, 11.99] // QNN EP qdq model output: [-11.0 (WRONG), -7.0, -2.0, 2.99, 8.0, 11.99] +// Issue fixed in 2.30 +#if (QNN_API_VERSION_MAJOR == 2) && (QNN_API_VERSION_MINOR >= 23) +TEST_F(QnnHTPBackendTests, UnaryOp_Ceil_U16) { +#else TEST_F(QnnHTPBackendTests, DISABLED_UnaryOp_Ceil_U16) { +#endif const std::vector input_data = GetFloatDataInRange(-12.0f, 12.0f, 6); RunQDQOpTest("Ceil", {TestInputDef({1, 2, 3}, false, input_data)}, @@ -1070,7 +1077,12 @@ TEST_F(QnnHTPBackendTests, GridSample_U16_AlignCorners) { // Expected val: 3.3620510101318359 // QNN QDQ val: 3.2922921180725098 (err 0.069758892059326172) // CPU QDQ val: 3.3850328922271729 (err 0.022981882095336914) +// Issue fixed in 2.30 +#if (QNN_API_VERSION_MAJOR == 2) && (QNN_API_VERSION_MINOR >= 23) +TEST_F(QnnHTPBackendTests, GridSample_BorderPadding) { +#else TEST_F(QnnHTPBackendTests, DISABLED_GridSample_BorderPadding) { +#endif RunQDQOpTest("GridSample", {TestInputDef({1, 1, 3, 2}, false, -10.0f, 10.0f), TestInputDef({1, 2, 4, 2}, false, -10.0f, 10.0f)}, diff --git a/onnxruntime/test/providers/qnn/split_op_test.cc b/onnxruntime/test/providers/qnn/split_op_test.cc index 6dc721edb421e..23682f7e934c3 100644 --- a/onnxruntime/test/providers/qnn/split_op_test.cc +++ b/onnxruntime/test/providers/qnn/split_op_test.cc @@ -7,7 +7,7 @@ #include "test/providers/qnn/qnn_test_utils.h" -#include "onnx/onnx_pb.h" +#include "core/graph/onnx_protobuf.h" #include "gtest/gtest.h" namespace onnxruntime { diff --git a/onnxruntime/test/providers/qnn/squeeze_unsqueeze_op_test.cc b/onnxruntime/test/providers/qnn/squeeze_unsqueeze_op_test.cc index 33d2f64c0315e..abc1b3a89d85c 100644 --- a/onnxruntime/test/providers/qnn/squeeze_unsqueeze_op_test.cc +++ b/onnxruntime/test/providers/qnn/squeeze_unsqueeze_op_test.cc @@ -7,7 +7,7 @@ #include "test/providers/qnn/qnn_test_utils.h" -#include "onnx/onnx_pb.h" +#include "core/graph/onnx_protobuf.h" #include "gtest/gtest.h" namespace onnxruntime { diff --git a/onnxruntime/test/providers/qnn/tile_op_test.cc b/onnxruntime/test/providers/qnn/tile_op_test.cc index 2b35c730ee5fe..85541efe5646c 100644 --- a/onnxruntime/test/providers/qnn/tile_op_test.cc +++ b/onnxruntime/test/providers/qnn/tile_op_test.cc @@ -8,7 +8,7 @@ #include "test/providers/qnn/qnn_test_utils.h" #include "core/graph/node_attr_utils.h" -#include "onnx/onnx_pb.h" +#include "core/graph/onnx_protobuf.h" #include "gtest/gtest.h" namespace onnxruntime { diff --git a/onnxruntime/test/providers/qnn/topk_op_test.cc b/onnxruntime/test/providers/qnn/topk_op_test.cc index 5a9351b9366ec..354a5d1e3b49a 100644 --- a/onnxruntime/test/providers/qnn/topk_op_test.cc +++ b/onnxruntime/test/providers/qnn/topk_op_test.cc @@ -8,7 +8,7 @@ #include "test/providers/qnn/qnn_test_utils.h" #include "core/graph/node_attr_utils.h" -#include "onnx/onnx_pb.h" +#include "core/graph/onnx_protobuf.h" #include "gtest/gtest.h" namespace onnxruntime { diff --git a/onnxruntime/test/providers/xnnpack/xnnpack_basic_test.cc b/onnxruntime/test/providers/xnnpack/xnnpack_basic_test.cc index 9f21b64681739..85ca7c1ed328e 100644 --- a/onnxruntime/test/providers/xnnpack/xnnpack_basic_test.cc +++ b/onnxruntime/test/providers/xnnpack/xnnpack_basic_test.cc @@ -266,7 +266,7 @@ TEST(XnnpackEP, TestQDQConvS8S8_per_channel) { RunModelTestWithPath(ort_model_path, "xnnpack_qdq_test_graph_conv_s8s8_perchannel", graph_verify, 0.2f); } -TEST(XnnpackEP, DISABLED_TestAveragePool) { // [ONNXRuntimeError] : 9 : NOT_IMPLEMENTED : Could not find an implementation for AveragePool(19) node with name 'node' +TEST(XnnpackEP, TestAveragePool) { const std::vector input_shape = {1, 2, 3, 3}; auto modelBuilder = [&input_shape](ModelTestBuilder& builder) { auto* input_arg = builder.MakeInput(input_shape, -1.f, 1.f); @@ -295,7 +295,7 @@ TEST(XnnpackEP, DISABLED_TestQDQAveragePool) { // [ONNXRuntimeError] : 9 : NOT }); } -TEST(XnnpackEP, DISABLED_TestMaxPool) { // NOT_IMPLEMENTED : Could not find an implementation for MaxPool(22) node with name 'node' +TEST(XnnpackEP, TestMaxPool) { const std::vector input_shape = {1, 2, 13, 13}; auto modelBuilder = [&input_shape](ModelTestBuilder& builder) { auto* input_arg = builder.MakeInput(input_shape, -1.f, 1.f); @@ -395,7 +395,7 @@ TEST(XnnpackEP, TestConvTranspose) { RunModelTestWithPath(ort_model_path, "test_conv_follow_convtrans", nullptr); } -TEST(XnnpackEP, DISABLED_TestConvTranspose_With_Outputpadding) { // NOT_IMPLEMENTED : Could not find an implementation for ConvTranspose(22) node with name 'node' +TEST(XnnpackEP, TestConvTranspose_With_Outputpadding) { const std::vector input_shape = {1, 4, 15, 15}; auto modelBuilder = [&input_shape](ModelTestBuilder& builder) { auto* input_arg = builder.MakeInput(input_shape, -127.f, 127.f); @@ -415,7 +415,7 @@ TEST(XnnpackEP, DISABLED_TestConvTranspose_With_Outputpadding) { // NOT_IMPLEME }); } -TEST(XnnpackEP, DISABLED_TestConvTranspose_With_OutputShape) { // NOT_IMPLEMENTED : Could not find an implementation for ConvTranspose(22) node with name 'node' +TEST(XnnpackEP, TestConvTranspose_With_OutputShape) { const std::vector input_shape = {1, 4, 15, 15}; auto modelBuilder = [&input_shape](ModelTestBuilder& builder) { auto* input_arg = builder.MakeInput(input_shape, -127.f, 127.f); diff --git a/onnxruntime/test/python/contrib_ops/onnx_contrib_ops_helper.py b/onnxruntime/test/python/contrib_ops/onnx_contrib_ops_helper.py index dd5d5cc90e0bf..1459dfc61c84c 100644 --- a/onnxruntime/test/python/contrib_ops/onnx_contrib_ops_helper.py +++ b/onnxruntime/test/python/contrib_ops/onnx_contrib_ops_helper.py @@ -65,11 +65,11 @@ def expect( del kwargs["output_types"] inputs_vi = [ _extract_value_info(arr, arr_name, input_type) - for arr, arr_name, input_type in zip(inputs, present_inputs, input_types) + for arr, arr_name, input_type in zip(inputs, present_inputs, input_types, strict=False) ] outputs_vi = [ _extract_value_info(arr, arr_name, output_type) - for arr, arr_name, output_type in zip(outputs, present_outputs, output_types) + for arr, arr_name, output_type in zip(outputs, present_outputs, output_types, strict=False) ] graph = onnx.helper.make_graph(nodes=[node], name=name, inputs=inputs_vi, outputs=outputs_vi) diff --git a/onnxruntime/test/python/onnx_backend_test_series.py b/onnxruntime/test/python/onnx_backend_test_series.py index 8fc76da3495a8..23f6d3e23e9bf 100644 --- a/onnxruntime/test/python/onnx_backend_test_series.py +++ b/onnxruntime/test/python/onnx_backend_test_series.py @@ -9,7 +9,6 @@ import re import sys import unittest -from typing import Dict import numpy as np import onnx @@ -28,8 +27,8 @@ class OrtBackendTest(onnx.backend.test.runner.Runner): # pylint: disable=too-few-public-methods def __init__( self, - rtol_overrides: Dict[str, float], - atol_overrides: Dict[str, float], + rtol_overrides: dict[str, float], + atol_overrides: dict[str, float], ): self._rtol_overrides = rtol_overrides self._atol_overrides = atol_overrides diff --git a/onnxruntime/test/python/onnxruntime_test_distributed.py b/onnxruntime/test/python/onnxruntime_test_distributed.py index de70478761f19..7f4f4b5bb2270 100644 --- a/onnxruntime/test/python/onnxruntime_test_distributed.py +++ b/onnxruntime/test/python/onnxruntime_test_distributed.py @@ -2,7 +2,6 @@ # Licensed under the MIT License. import unittest -from typing import Tuple import numpy as np import onnxscript @@ -23,7 +22,7 @@ def shard_tensor_per_device_mesh(X, rank, axis, device_mesh): if axis is None: return X shards = np.split(X, len(device_mesh), axis) - selected_shards = tuple(shard for device_id, shard in zip(device_mesh, shards) if device_id == rank) + selected_shards = tuple(shard for device_id, shard in zip(device_mesh, shards, strict=False) if device_id == rank) return np.concatenate(selected_shards, axis=axis) @@ -99,12 +98,12 @@ def shard_tensor_per_spec(tensor: np.ndarray, rank: int, spec: str, device_mesh: class TestDistributedReshape(unittest.TestCase): def _check_distributed_reshape( self, - shape: Tuple[int, ...], - target_shape: Tuple[int, ...], + shape: tuple[int, ...], + target_shape: tuple[int, ...], input_device_meshes: np.ndarray, - input_shard_specs: Tuple[str, ...], + input_shard_specs: tuple[str, ...], output_device_meshes: np.ndarray, - output_shard_specs: Tuple[str, ...], + output_shard_specs: tuple[str, ...], ): input_device_mesh_shapes, input_device_mesh_elements = translate_all_device_meshes(input_device_meshes) output_device_mesh_shapes, output_device_mesh_elements = translate_all_device_meshes(output_device_meshes) @@ -683,12 +682,12 @@ def test_reshape_two_axis_fusion_shape_3_7_4096_rrs_01_shape_21_4906_rs_01(self) class TestDistributedExpand(unittest.TestCase): def _check_distributed_expand( self, - shape: Tuple[int, ...], - target_shape: Tuple[int, ...], + shape: tuple[int, ...], + target_shape: tuple[int, ...], input_device_meshes: np.ndarray, - input_shard_specs: Tuple[str, ...], + input_shard_specs: tuple[str, ...], output_device_meshes: np.ndarray, - output_shard_specs: Tuple[str, ...], + output_shard_specs: tuple[str, ...], ): assert len(input_device_meshes) == len(input_shard_specs) assert len(output_device_meshes) == len(output_shard_specs) @@ -855,12 +854,12 @@ def test_expand_in_tiny_llama(self): class TestDistributedUnsqueeze(unittest.TestCase): def _check_distributed_unsqueeze( self, - shape: Tuple[int, ...], - axes: Tuple[int, ...], + shape: tuple[int, ...], + axes: tuple[int, ...], input_device_meshes: np.ndarray, - input_shard_specs: Tuple[str, ...], + input_shard_specs: tuple[str, ...], output_device_meshes: np.ndarray, - output_shard_specs: Tuple[str, ...], + output_shard_specs: tuple[str, ...], ): assert len(input_device_meshes) == len(input_shard_specs) assert len(output_device_meshes) == len(output_shard_specs) @@ -977,12 +976,12 @@ def test_unsqueeze_not_sharded(self): class TestDistributedSqueeze(unittest.TestCase): def _check_distributed_squeeze( self, - shape: Tuple[int, ...], - axes: Tuple[int, ...], + shape: tuple[int, ...], + axes: tuple[int, ...], input_device_meshes: np.ndarray, - input_shard_specs: Tuple[str, ...], + input_shard_specs: tuple[str, ...], output_device_meshes: np.ndarray, - output_shard_specs: Tuple[str, ...], + output_shard_specs: tuple[str, ...], ): assert len(input_device_meshes) == len(input_shard_specs) assert len(output_device_meshes) == len(output_shard_specs) @@ -1086,12 +1085,12 @@ def _check_distributed_reduce( self, keepdims: int, dtype: np.dtype, - shape: Tuple[int, ...], - axes: Tuple[int, ...], + shape: tuple[int, ...], + axes: tuple[int, ...], input_device_meshes: np.ndarray, - input_shard_specs: Tuple[str, ...], + input_shard_specs: tuple[str, ...], output_device_meshes: np.ndarray, - output_shard_specs: Tuple[str, ...], + output_shard_specs: tuple[str, ...], ): assert len(input_device_meshes) == len(input_shard_specs) assert len(output_device_meshes) == len(output_shard_specs) @@ -1146,6 +1145,7 @@ def distributed_reduce_mean_instance(data_tensor: FLOAT, axes_tensor: INT64): for onnx_func, np_func in zip( [distributed_reduce_sum_instance, distributed_reduce_max_instance, distributed_reduce_mean_instance], [np.sum, np.maximum.reduce, np.mean], + strict=False, ): data = np.random.randint(4, size=shape).astype(dtype) expected = np_func(data, axis=axes, keepdims=bool(keepdims)) diff --git a/onnxruntime/test/python/onnxruntime_test_float8.py b/onnxruntime/test/python/onnxruntime_test_float8.py index bb63ea234498f..71efcefc12d76 100644 --- a/onnxruntime/test/python/onnxruntime_test_float8.py +++ b/onnxruntime/test/python/onnxruntime_test_float8.py @@ -23,7 +23,7 @@ if platform.system() == "Windows" and sys.version_info[:2] >= (3, 8): os.add_dll_directory(os.getcwd()) -available_providers = [provider for provider in onnxruntime.get_available_providers()] +available_providers = list(onnxruntime.get_available_providers()) class TestInferenceSession(unittest.TestCase): @@ -354,8 +354,7 @@ def test_model_cast_cast_cuda(self, name: str, float_name: str, saturate: int, p assert_allclose(expect, y) except AssertionError as e: raise AssertionError( - f"Discrepancies with name={name}, float_name={float_name}, " - f"saturate={saturate}\nexpect={expect}\ny={y}" + f"Discrepancies with name={name}, float_name={float_name}, saturate={saturate}\nexpect={expect}\ny={y}" ) from e self.assertEqual(expect.shape, y.shape) self.assertEqual(expect.dtype, y.dtype) @@ -394,8 +393,7 @@ def test_model_cast_cast_cuda_ortvalue(self, name: str, float_name: str, saturat assert_allclose(expect, y) except AssertionError as e: raise AssertionError( - f"Discrepancies with name={name}, float_name={float_name}, " - f"saturate={saturate}\nexpect={expect}\ny={y}" + f"Discrepancies with name={name}, float_name={float_name}, saturate={saturate}\nexpect={expect}\ny={y}" ) from e self.assertEqual(expect.shape, y.shape) self.assertEqual(expect.dtype, y.dtype) @@ -608,8 +606,7 @@ def test_model_cast_like_x2_cpu(self, name: str, float_name: str, saturate: int) if not saturate: return raise AssertionError( - f"Discrepancies with name={name}, float_name={float_name}, " - f"saturate={saturate}\nexpect={expect}\ny={y}" + f"Discrepancies with name={name}, float_name={float_name}, saturate={saturate}\nexpect={expect}\ny={y}" ) from e self.assertEqual(expect.shape, y.shape) self.assertEqual(expect.dtype, y.dtype) diff --git a/onnxruntime/test/python/onnxruntime_test_float8_gemm8.py b/onnxruntime/test/python/onnxruntime_test_float8_gemm8.py index 2dba8ff532a0a..bb65533c3d1e0 100644 --- a/onnxruntime/test/python/onnxruntime_test_float8_gemm8.py +++ b/onnxruntime/test/python/onnxruntime_test_float8_gemm8.py @@ -20,7 +20,7 @@ from onnxruntime import InferenceSession, get_available_providers -available_providers = [provider for provider in get_available_providers()] +available_providers = list(get_available_providers()) class TestFloat8Gemm8(unittest.TestCase): @@ -72,19 +72,19 @@ def get_model_gemm( if use_f8: assert domain == "com.microsoft" inits.append(from_array(np.array([1], dtype=np.float32), name="one")) - kwargs = dict( - domain=domain, - dtype=dtype, - ) + kwargs = { + "domain": domain, + "dtype": dtype, + } if activation is not None: kwargs["activation"] = activation op_name = "GemmFloat8" elif domain == "com.microsoft": op_name = "GemmFloat8" - kwargs = dict( - domain=domain, - dtype=dtype, - ) + kwargs = { + "domain": domain, + "dtype": dtype, + } else: op_name = "Gemm" nodes = [ @@ -173,16 +173,16 @@ def check(f): raise AssertionError( f"Gemm ERROR len(inputs)={len(feeds)}" - f"\na@b=\n{check(lambda:a@b)}" - f"\na.T@b=\n{check(lambda:a.T@b)}" - f"\na@b.T=\n{check(lambda:a@b.T)}" - f"\na.T@b.T=\n{check(lambda:a.T@b.T)}" - f"\n----\nb@a=\n{check(lambda:b@a)}" - f"\nb.T@a=\n{check(lambda:b.T@a)}" - f"\nb@a.T=\n{check(lambda:b@a.T)}" - f"\nb.T@a.T=\n{check(lambda:b.T@a.T)}" - f"\n----\nexpected=\n{expected[:2,:2]}" - f"\n----\ngot=\n{y[:2,:2]}" + f"\na@b=\n{check(lambda: a @ b)}" + f"\na.T@b=\n{check(lambda: a.T @ b)}" + f"\na@b.T=\n{check(lambda: a @ b.T)}" + f"\na.T@b.T=\n{check(lambda: a.T @ b.T)}" + f"\n----\nb@a=\n{check(lambda: b @ a)}" + f"\nb.T@a=\n{check(lambda: b.T @ a)}" + f"\nb@a.T=\n{check(lambda: b @ a.T)}" + f"\nb.T@a.T=\n{check(lambda: b.T @ a.T)}" + f"\n----\nexpected=\n{expected[:2, :2]}" + f"\n----\ngot=\n{y[:2, :2]}" f"\nkwargs={kwargs}" ) from e @@ -225,16 +225,16 @@ def check(f): raise AssertionError( f"Gemm ERROR len(inputs)={len(feeds)}" - f"\na@b=\n{check(lambda:a@b)}" - f"\na.T@b=\n{check(lambda:a.T@b)}" - f"\na@b.T=\n{check(lambda:a@b.T)}" - f"\na.T@b.T=\n{check(lambda:a.T@b.T)}" - f"\n----\nb@a=\n{check(lambda:b@a)}" - f"\nb.T@a=\n{check(lambda:b.T@a)}" - f"\nb@a.T=\n{check(lambda:b@a.T)}" - f"\nb.T@a.T=\n{check(lambda:b.T@a.T)}" - f"\n----\nexpected=\n{expected[:2,:2]}" - f"\n----\ngot=\n{y[:2,:2]}" + f"\na@b=\n{check(lambda: a @ b)}" + f"\na.T@b=\n{check(lambda: a.T @ b)}" + f"\na@b.T=\n{check(lambda: a @ b.T)}" + f"\na.T@b.T=\n{check(lambda: a.T @ b.T)}" + f"\n----\nb@a=\n{check(lambda: b @ a)}" + f"\nb.T@a=\n{check(lambda: b.T @ a)}" + f"\nb@a.T=\n{check(lambda: b @ a.T)}" + f"\nb.T@a.T=\n{check(lambda: b.T @ a.T)}" + f"\n----\nexpected=\n{expected[:2, :2]}" + f"\n----\ngot=\n{y[:2, :2]}" f"\nkwargs={kwargs}" ) from e self.assertEqual(expected.shape, y.shape) diff --git a/onnxruntime/test/python/onnxruntime_test_python.py b/onnxruntime/test/python/onnxruntime_test_python.py index 9b26944629aa6..8aaa0aa02df79 100644 --- a/onnxruntime/test/python/onnxruntime_test_python.py +++ b/onnxruntime/test/python/onnxruntime_test_python.py @@ -23,7 +23,7 @@ if platform.system() == "Windows" and sys.version_info.major >= 3 and sys.version_info.minor >= 8: # noqa: YTT204 os.add_dll_directory(os.getcwd()) -available_providers = [provider for provider in onnxrt.get_available_providers()] +available_providers = list(onnxrt.get_available_providers()) # TVM EP doesn't support: # * calling Run() on different threads using the same session object @@ -85,7 +85,7 @@ def cuda_device_count(self, cuda_lib): if result != 0: error_str = ctypes.c_char_p() cuda_lib.cuGetErrorString(result, ctypes.byref(error_str)) - print("cuDeviceGetCount failed with error code %d: %s" % (result, error_str.value.decode())) + print(f"cuDeviceGetCount failed with error code {result}: {error_str.value.decode()}") return -1 return num_device.value diff --git a/onnxruntime/test/python/onnxruntime_test_python_cudagraph.py b/onnxruntime/test/python/onnxruntime_test_python_cudagraph.py index ce04dff2aecb0..5ab2fe8939f6a 100644 --- a/onnxruntime/test/python/onnxruntime_test_python_cudagraph.py +++ b/onnxruntime/test/python/onnxruntime_test_python_cudagraph.py @@ -2,7 +2,6 @@ # Licensed under the MIT License. import unittest -from typing import Dict, List import numpy as np from helper import get_name @@ -14,7 +13,7 @@ class CudaGraphHelper: def __init__( self, ort_session: onnxrt.InferenceSession, - input_and_output_shape: Dict[str, List[int]], + input_and_output_shape: dict[str, list[int]], device_id: int = 0, ): self.input_names = [input.name for input in ort_session.get_inputs()] @@ -52,7 +51,7 @@ def get_io_numpy_type_map(self, ort_session: onnxrt.InferenceSession): return name_to_numpy_type - def update_inputs(self, inputs: Dict[str, np.ndarray]): + def update_inputs(self, inputs: dict[str, np.ndarray]): for input_name in self.input_names: self.io_ort_value[input_name].update_inplace(inputs[input_name]) diff --git a/onnxruntime/test/python/onnxruntime_test_python_dmlgraph.py b/onnxruntime/test/python/onnxruntime_test_python_dmlgraph.py index 29292c2a777b1..033eae1cb4c8d 100644 --- a/onnxruntime/test/python/onnxruntime_test_python_dmlgraph.py +++ b/onnxruntime/test/python/onnxruntime_test_python_dmlgraph.py @@ -2,7 +2,6 @@ # Licensed under the MIT License. import unittest -from typing import Dict, List import numpy as np from helper import get_name @@ -14,7 +13,7 @@ class DmlGraphHelper: def __init__( self, ort_session: onnxrt.InferenceSession, - input_and_output_shape: Dict[str, List[int]], + input_and_output_shape: dict[str, list[int]], device_id: int = 0, ): self.input_names = [input.name for input in ort_session.get_inputs()] @@ -52,7 +51,7 @@ def get_io_numpy_type_map(self, ort_session: onnxrt.InferenceSession): return name_to_numpy_type - def update_inputs(self, inputs: Dict[str, np.ndarray]): + def update_inputs(self, inputs: dict[str, np.ndarray]): for input_name in self.input_names: self.io_ort_value[input_name].update_inplace(inputs[input_name]) diff --git a/onnxruntime/test/python/onnxruntime_test_python_iobinding.py b/onnxruntime/test/python/onnxruntime_test_python_iobinding.py index 01269bc02d77c..77f9e6f5cf39c 100644 --- a/onnxruntime/test/python/onnxruntime_test_python_iobinding.py +++ b/onnxruntime/test/python/onnxruntime_test_python_iobinding.py @@ -221,9 +221,8 @@ def test_bind_onnx_types_not_supported_by_numpy(self): ) for inner_device, provider in devices: - for onnx_dtype in onnx_to_torch_type_map: + for onnx_dtype, torch_dtype in onnx_to_torch_type_map.items(): with self.subTest(onnx_dtype=onnx_dtype, inner_device=str(inner_device)): - # Create onnx graph with dynamic axes X = helper.make_tensor_value_info("X", onnx_dtype, [None]) # noqa: N806 Y = helper.make_tensor_value_info("Y", onnx_dtype, [None]) # noqa: N806 @@ -239,7 +238,6 @@ def test_bind_onnx_types_not_supported_by_numpy(self): sess = onnxrt.InferenceSession(model_def.SerializeToString(), providers=provider) - torch_dtype = onnx_to_torch_type_map[onnx_dtype] x = torch.arange(8).to(torch_dtype) y = torch.empty(8, dtype=torch_dtype) diff --git a/onnxruntime/test/python/onnxruntime_test_python_nested_control_flow_op.py b/onnxruntime/test/python/onnxruntime_test_python_nested_control_flow_op.py index bf354ad9f9e10..0a311245dd2b5 100644 --- a/onnxruntime/test/python/onnxruntime_test_python_nested_control_flow_op.py +++ b/onnxruntime/test/python/onnxruntime_test_python_nested_control_flow_op.py @@ -2,8 +2,8 @@ # Licensed under the MIT License. import unittest +from collections.abc import Sequence from copy import deepcopy -from typing import Optional, Sequence, Tuple import numpy as np from onnx import ModelProto, NodeProto, TensorProto, ValueInfoProto, checker, helper @@ -31,7 +31,7 @@ def make_optional_tensor_value_info(name: str, elem_type: int, shape: Sequence[i return vi -def make_optional_vi(vi: ValueInfoProto, name: Optional[str] = None) -> ValueInfoProto: +def make_optional_vi(vi: ValueInfoProto, name: str | None = None) -> ValueInfoProto: """Makes a copy of `vi` with optional type.""" name = name or vi.name + ".opt" vi_type = vi.type.tensor_type @@ -40,7 +40,7 @@ def make_optional_vi(vi: ValueInfoProto, name: Optional[str] = None) -> ValueInf return opt_vi -def make_const(vi: ValueInfoProto, name: str, value: int = 0) -> Tuple[ValueInfoProto, NodeProto, TensorProto]: +def make_const(vi: ValueInfoProto, name: str, value: int = 0) -> tuple[ValueInfoProto, NodeProto, TensorProto]: """Creates a constant 1D tensor from `vi`.""" const_vi = make_vi_like(vi, name) const_shape = [d.dim_value for d in vi.type.tensor_type.shape.dim] diff --git a/onnxruntime/test/python/onnxruntime_test_python_symbolic_shape_infer.py b/onnxruntime/test/python/onnxruntime_test_python_symbolic_shape_infer.py index 2f8fb84c4c651..d311b4b8517cf 100644 --- a/onnxruntime/test/python/onnxruntime_test_python_symbolic_shape_infer.py +++ b/onnxruntime/test/python/onnxruntime_test_python_symbolic_shape_infer.py @@ -110,17 +110,17 @@ def test_mismatched_types(self): class TestSymbolicShapeInferenceForOperators(unittest.TestCase): def _check_shapes(self, graph, inferred_graph, vis): # type: (GraphProto, GraphProto, List[ValueInfoProto]) -> None names_in_vis = {x.name for x in vis} - vis = list(x for x in graph.value_info if x.name not in names_in_vis) + vis + vis = [x for x in graph.value_info if x.name not in names_in_vis] + vis inferred_vis = list(inferred_graph.value_info) - vis = list(sorted(vis, key=lambda x: x.name)) - inferred_vis = list(sorted(inferred_vis, key=lambda x: x.name)) + vis = sorted(vis, key=lambda x: x.name) + inferred_vis = sorted(inferred_vis, key=lambda x: x.name) if vis == inferred_vis: return # otherwise some custom logic to give a nicer diff vis_names = {x.name for x in vis} inferred_vis_names = {x.name for x in inferred_vis} assert vis_names == inferred_vis_names, (vis_names, inferred_vis_names) - for vi, inferred_vi in zip(vis, inferred_vis): + for vi, inferred_vi in zip(vis, inferred_vis, strict=False): assert vi == inferred_vi, f"\n{vi}\n{inferred_vi}\n" raise AssertionError() diff --git a/onnxruntime/test/python/onnxruntime_test_scatternd.py b/onnxruntime/test/python/onnxruntime_test_scatternd.py index 2a5555bba37de..ccfbe98b867b9 100644 --- a/onnxruntime/test/python/onnxruntime_test_scatternd.py +++ b/onnxruntime/test/python/onnxruntime_test_scatternd.py @@ -15,11 +15,11 @@ def has_cuda(): - available_providers = [provider for provider in onnxruntime.get_available_providers()] + available_providers = list(onnxruntime.get_available_providers()) return "CUDAExecutionProvider" in available_providers -def ignore_warnings(warns: typing.List[Warning]) -> typing.Callable: +def ignore_warnings(warns: list[Warning]) -> typing.Callable: def wrapper(fct): if warns is None: raise AssertionError(f"warns cannot be None for '{fct}'.") @@ -88,8 +88,8 @@ def common_scatter(self, opset, providers, dtype, reduction, expected_names): self.assertEqual(expected_names, names) sonx = str(onx).replace(" ", "").replace("\n", "|") - sexp = 'op_type:"Cast"|attribute{|name:"to"|type:INT|i:%d|}' % itype - sexp2 = 'op_type:"Cast"|attribute{|name:"to"|i:%d|type:INT|}' % itype + sexp = 'op_type:"Cast"|attribute{|name:"to"|type:INT|i:%d|}' % itype # noqa: UP031 + sexp2 = 'op_type:"Cast"|attribute{|name:"to"|i:%d|type:INT|}' % itype # noqa: UP031 assert sexp in sonx or sexp2 in sonx, f"Unable to find a substring in {sonx!r}" if providers == ["CPUExecutionProvider"]: return @@ -297,7 +297,7 @@ def _scatternd_standalone_cuda(self, reduction, line): indices = np.array([[line], [1 - line], [line]], dtype=np.int64) updates = (2 ** (np.arange(18) + 1).astype(np.float32).reshape((3, 2, 3))).astype(np.float32) - feeds = dict(data=data, indices=indices, updates=updates) + feeds = {"data": data, "indices": indices, "updates": updates} ref = ReferenceEvaluator(model) expected = ref.run(None, feeds)[0] diff --git a/onnxruntime/test/python/quantization/op_test_utils.py b/onnxruntime/test/python/quantization/op_test_utils.py index 82193d08684c6..e329b4da38f67 100644 --- a/onnxruntime/test/python/quantization/op_test_utils.py +++ b/onnxruntime/test/python/quantization/op_test_utils.py @@ -379,10 +379,10 @@ def check_op_type_count(testcase, model_path, **kwargs): if node.op_type in optype2count: optype2count[node.op_type] += 1 - for op_type in kwargs: + for op_type, value in kwargs.items(): try: testcase.assertEqual( - kwargs[op_type], + value, optype2count[op_type], f"op_type {op_type} count not same", ) @@ -414,7 +414,7 @@ def check_sign_f8_quantization(model_path_origin, model_path_to_check): scale_zp = [i.name for i in model_f8.graph.initializer if i.name.startswith(name)] if len(scale_zp) not in (1, 3): raise AssertionError( - f"Need one or three names not {scale_zp}, all names: {set(i.name for i in model_f8.graph.initializer)}." + f"Need one or three names not {scale_zp}, all names: { {i.name for i in model_f8.graph.initializer} }." ) scale = [name for name in scale_zp if "scale" in name] zero = [name for name in scale_zp if "zero" in name] @@ -480,7 +480,7 @@ def check_model_correctness( with open(model_path_origin, "rb") as f: model_onnx = onnx.load(f) - ops_set = set(node.op_type for node in model_onnx.graph.node) + ops_set = {node.op_type for node in model_onnx.graph.node} check_reference_evaluator = not (ops_set & {"EmbedLayerNormalization", "Conv", "Attention", "Transpose"}) check_target_evaluator = False @@ -639,7 +639,7 @@ def check_qtype_by_node_type(testcase, model_to_check, check_list): tensor_name = node.input[check_item[1]] if check_item[0] == "i" else node.output[check_item[1]] if tensor_name not in value_infos and tensor_name not in initializers: raise AssertionError( - f"Unable to find tensor_name={tensor_name!r} in {list(sorted(value_infos))}\n{model}" + f"Unable to find tensor_name={tensor_name!r} in {sorted(value_infos)}\n{model}" ) if tensor_name in value_infos: vi = value_infos[tensor_name] diff --git a/onnxruntime/test/python/quantization/test_calibration.py b/onnxruntime/test/python/quantization/test_calibration.py index e7f5a959a37c5..60c5f9d404258 100644 --- a/onnxruntime/test/python/quantization/test_calibration.py +++ b/onnxruntime/test/python/quantization/test_calibration.py @@ -358,11 +358,11 @@ def test_compute_data(self): rmin = np.minimum(rmin, np.amin(output, axis=1)) rmax = np.maximum(rmax, np.amax(output, axis=1)) - min_max_pairs = list(zip(rmin, rmax)) + min_max_pairs = list(zip(rmin, rmax, strict=False)) output_names = [infer_session.get_outputs()[i].name for i in range(len(infer_session.get_outputs()))] - output_min_max_dict = dict(zip(output_names, min_max_pairs)) - for output_name in output_min_max_dict: - self.assertEqual(output_min_max_dict[output_name], tensors_range[output_name].range_value) + output_min_max_dict = dict(zip(output_names, min_max_pairs, strict=False)) + for output_name, min_max in output_min_max_dict.items(): + self.assertEqual(min_max, tensors_range[output_name].range_value) def test_histogram_calibrators_run(self): """ @@ -521,11 +521,11 @@ def test_compute_data_per_channel(self): rmin = np.minimum(rmin, np.amin(output, axis=-1)) rmax = np.maximum(rmax, np.amax(output, axis=-1)) - min_max_pairs = list(zip(rmin, rmax)) + min_max_pairs = list(zip(rmin, rmax, strict=False)) output_names = [infer_session.get_outputs()[i].name for i in range(len(infer_session.get_outputs()))] - output_min_max_dict = dict(zip(output_names, min_max_pairs)) - for output_name in output_min_max_dict: - np.testing.assert_equal(output_min_max_dict[output_name], tensors_range[output_name].range_value) + output_min_max_dict = dict(zip(output_names, min_max_pairs, strict=False)) + for output_name, min_max in output_min_max_dict.items(): + np.testing.assert_equal(min_max, tensors_range[output_name].range_value) if __name__ == "__main__": diff --git a/onnxruntime/test/python/quantization/test_conv_dynamic.py b/onnxruntime/test/python/quantization/test_conv_dynamic.py index f6ee3fe97a745..5892e18baec2e 100644 --- a/onnxruntime/test/python/quantization/test_conv_dynamic.py +++ b/onnxruntime/test/python/quantization/test_conv_dynamic.py @@ -10,9 +10,13 @@ import numpy as np import onnx from onnx import TensorProto, helper, numpy_helper -from op_test_utils import TestDataFeeds # noqa: F401 -from op_test_utils import check_op_type_order # noqa: F401 -from op_test_utils import check_model_correctness, check_op_type_count, check_qtype_by_node_type +from op_test_utils import ( + TestDataFeeds, # noqa: F401 + check_model_correctness, + check_op_type_count, + check_op_type_order, # noqa: F401 + check_qtype_by_node_type, +) from onnxruntime.quantization import DynamicQuantConfig, QuantType, quantize, quantize_dynamic diff --git a/onnxruntime/test/python/quantization/test_get_qdq_config.py b/onnxruntime/test/python/quantization/test_get_qdq_config.py index 58d00272475cd..25f058d8f6eac 100644 --- a/onnxruntime/test/python/quantization/test_get_qdq_config.py +++ b/onnxruntime/test/python/quantization/test_get_qdq_config.py @@ -152,7 +152,7 @@ def should_exclude_node_(model: onnx.ModelProto, node: onnx.NodeProto) -> bool: nodes_to_exclude=should_exclude_node_, ) - expected_excluded_nodes = set([node.name for node in float_model.graph.node if node.op_type == "Add"]) + expected_excluded_nodes = {node.name for node in float_model.graph.node if node.op_type == "Add"} self.assertTrue(bool(expected_excluded_nodes)) self.assertEqual(set(qdq_config.nodes_to_exclude), expected_excluded_nodes) diff --git a/onnxruntime/test/python/quantization/test_op_gemm.py b/onnxruntime/test/python/quantization/test_op_gemm.py index 96a7ab6b9d9f3..11e0bf2d40a88 100644 --- a/onnxruntime/test/python/quantization/test_op_gemm.py +++ b/onnxruntime/test/python/quantization/test_op_gemm.py @@ -550,7 +550,17 @@ def test_qgemm_ref_int8(self): C = np.array([[0, 0], [0, 0]], dtype=np.int32) scale = np.array([1], dtype=np.float32) zp = np.array([0], dtype=np.int8) - feeds = dict(A=A, scaleA=scaleA, zpA=zpA, B=B, scaleB=scaleB, zpB=zpB, C=C, scale=scale, zp=zp) + feeds = { + "A": A, + "scaleA": scaleA, + "zpA": zpA, + "B": B, + "scaleB": scaleB, + "zpB": zpB, + "C": C, + "scale": scale, + "zp": zp, + } expected = sess.run(None, feeds)[0] got = ref.run(None, feeds)[0] assert_allclose(expected, got) @@ -628,7 +638,7 @@ def test_q_ref_uint8(self): A = np.array([[2, 1], [1, 0]], dtype=np.float32) scaleA = np.array([1], dtype=np.float32) zpA = np.array([0], dtype=np.uint8) - feeds = dict(A=A, scaleA=scaleA, zpA=zpA) + feeds = {"A": A, "scaleA": scaleA, "zpA": zpA} expected = sess.run(None, feeds)[0] got = ref.run(None, feeds)[0] assert_allclose(expected, got) @@ -694,7 +704,7 @@ def test_q_ref_int8(self): A = np.array([[2, 1], [1, 0]], dtype=np.float32) scaleA = np.array([1], dtype=np.float32) zpA = np.array([0], dtype=np.int8) - feeds = dict(A=A, scaleA=scaleA, zpA=zpA) + feeds = {"A": A, "scaleA": scaleA, "zpA": zpA} expected = sess.run(None, feeds)[0] got = ref.run(None, feeds)[0] assert_allclose(expected, got) @@ -828,7 +838,7 @@ def test_dynamic_quantization(self): qpath = "test_dynamic_quantization.quantized.onnx" quantize_dynamic(model_input=model_path, model_output=qpath, use_external_data_format=True, **run_config) onx = onnx.load(qpath) - self.assertIn("DynamicQuantizeLinear", set(n.op_type for n in onx.graph.node)) + self.assertIn("DynamicQuantizeLinear", {n.op_type for n in onx.graph.node}) if __name__ == "__main__": diff --git a/onnxruntime/test/python/quantization/test_op_matmul_4bits.py b/onnxruntime/test/python/quantization/test_op_matmul_4bits.py index 292dc50124c16..ed0c65cba78ac 100644 --- a/onnxruntime/test/python/quantization/test_op_matmul_4bits.py +++ b/onnxruntime/test/python/quantization/test_op_matmul_4bits.py @@ -9,7 +9,6 @@ import unittest from importlib.util import find_spec from pathlib import Path -from typing import Dict, Tuple, Union import numpy as np import onnx @@ -28,7 +27,7 @@ def setUpClass(cls): def tearDownClass(cls): cls._tmp_model_dir.cleanup() - def fill_int4_data(self, shape: Union[int, Tuple[int, ...]], symmetric: bool) -> np.ndarray: + def fill_int4_data(self, shape: int | tuple[int, ...], symmetric: bool) -> np.ndarray: line = np.zeros(shape) line = line.reshape(-1) @@ -54,7 +53,7 @@ def fill_int4_data(self, shape: Union[int, Tuple[int, ...]], symmetric: bool) -> def input_feeds( self, n: int, - name2shape: Dict[str, Union[int, Tuple[int, ...]]], + name2shape: dict[str, int | tuple[int, ...]], low: int = -1, high: int = 2, dtype: type = np.float32, @@ -79,7 +78,7 @@ def construct_model_matmul(self, output_model_path: str, symmetric: bool) -> Non initializers = [] def make_matmul( - input_name, weight_shape: Union[int, Tuple[int, ...]], weight_name: str, output_name: str, node_name: str + input_name, weight_shape: int | tuple[int, ...], weight_name: str, output_name: str, node_name: str ): weight_data = self.fill_int4_data(weight_shape, symmetric).astype(np.float32) initializers.append(onnx.numpy_helper.from_array(weight_data, name=weight_name)) @@ -137,7 +136,7 @@ def construct_model_gather( initializers = [] def make_gather( - indices_name, data_shape: Union[int, Tuple[int, ...]], data_name: str, output_name: str, node_name: str + indices_name, data_shape: int | tuple[int, ...], data_name: str, output_name: str, node_name: str ): weight_data = self.fill_int4_data(data_shape, symmetric).astype( np.float32 if tdata == TensorProto.FLOAT else np.float16 @@ -184,8 +183,8 @@ def quant_test( block_size: int, is_symmetric: bool, quant_format: quant_utils.QuantFormat = quant_utils.QuantFormat.QOperator, - op_types_to_quantize: Tuple[str, ...] = ("MatMul",), - quant_axes: Tuple[Tuple[str, int], ...] = (("MatMul", 0), ("Gather", 1)), + op_types_to_quantize: tuple[str, ...] = ("MatMul",), + quant_axes: tuple[tuple[str, int], ...] = (("MatMul", 0), ("Gather", 1)), rtol: float = 0.01, atol: float = 0.05, ): diff --git a/onnxruntime/test/python/quantization/test_op_matmul_bnb4.py b/onnxruntime/test/python/quantization/test_op_matmul_bnb4.py index 88432d75c653e..d32abc1476600 100644 --- a/onnxruntime/test/python/quantization/test_op_matmul_bnb4.py +++ b/onnxruntime/test/python/quantization/test_op_matmul_bnb4.py @@ -9,7 +9,6 @@ import unittest from importlib.util import find_spec from pathlib import Path -from typing import Dict, Tuple, Union import numpy as np import onnx @@ -67,7 +66,7 @@ def setUpClass(cls): def tearDownClass(cls): cls._tmp_model_dir.cleanup() - def fill_bnb4_data(self, shape: Tuple[int, int], quant_type: int) -> np.ndarray: + def fill_bnb4_data(self, shape: tuple[int, int], quant_type: int) -> np.ndarray: rows, cols = shape line = np.zeros(shape) line = line.reshape(-1) @@ -84,7 +83,7 @@ def fill_bnb4_data(self, shape: Tuple[int, int], quant_type: int) -> np.ndarray: line = line.reshape(cols, rows).transpose() return line.reshape(shape) - def input_feeds(self, n: int, name2shape: Dict[str, Union[int, Tuple[int, ...]]]) -> TestDataFeeds: + def input_feeds(self, n: int, name2shape: dict[str, int | tuple[int, ...]]) -> TestDataFeeds: input_data_list = [] for _i in range(n): inputs = {} @@ -104,7 +103,7 @@ def construct_model_matmul(self, output_model_path: str, quant_type: int) -> Non output_name = "output" initializers = [] - def make_matmul(input_name, weight_shape: Union[int, Tuple[int, ...]], weight_name: str, output_name: str): + def make_matmul(input_name, weight_shape: int | tuple[int, ...], weight_name: str, output_name: str): weight_data = self.fill_bnb4_data(weight_shape, quant_type).astype(np.float32) initializers.append(onnx.numpy_helper.from_array(weight_data, name=weight_name)) return onnx.helper.make_node( diff --git a/onnxruntime/test/python/quantization/test_op_pad.py b/onnxruntime/test/python/quantization/test_op_pad.py index 755c7fae5e3e8..28dc8f4b7dee7 100644 --- a/onnxruntime/test/python/quantization/test_op_pad.py +++ b/onnxruntime/test/python/quantization/test_op_pad.py @@ -54,7 +54,7 @@ def construct_model_pad( input_tensor = helper.make_tensor_value_info("input", TensorProto.FLOAT, pad_input_shape) pad_dims_initializer = helper.make_tensor("pad_dims", TensorProto.INT64, [2 * rank], pad_dims) - output_shape = [sum(e) for e in list(zip(pad_input_shape, pad_dims[:rank], pad_dims[rank:]))] + output_shape = [sum(e) for e in list(zip(pad_input_shape, pad_dims[:rank], pad_dims[rank:], strict=False))] output_tensor = helper.make_tensor_value_info("output", TensorProto.FLOAT, output_shape) inputs = ["input", "pad_dims"] @@ -108,7 +108,7 @@ def construct_model_conv_pad( identity_node = helper.make_node("Identity", ["conv_output"], ["identity_out"], name="IdentityNode") pad_dims_initializer = helper.make_tensor("pad_dims", TensorProto.INT64, [2 * rank], pad_dims) - output_shape = [sum(e) for e in list(zip(pad_input_shape, pad_dims[:rank], pad_dims[rank:]))] + output_shape = [sum(e) for e in list(zip(pad_input_shape, pad_dims[:rank], pad_dims[rank:], strict=False))] output_tensor = helper.make_tensor_value_info("output", TensorProto.FLOAT, output_shape) pad_inputs = ["conv_output", "pad_dims"] initializers = [conv_weight_initializer, pad_dims_initializer] @@ -385,7 +385,7 @@ def construct_edge_case_model( identity_node = helper.make_node("Identity", ["conv_output"], ["identity_out"], name="IdentityNode") pad_dims_initializer = helper.make_tensor("pad_dims", TensorProto.INT64, [2 * rank], pad_dims) - output_shape = [sum(e) for e in list(zip(pad_input_shape, pad_dims[:rank], pad_dims[rank:]))] + output_shape = [sum(e) for e in list(zip(pad_input_shape, pad_dims[:rank], pad_dims[rank:], strict=False))] output_tensor = helper.make_tensor_value_info("output", TensorProto.FLOAT, output_shape) pad_inputs = ["conv_output", "pad_dims"] initializers = [conv_weight_initializer, pad_dims_initializer] diff --git a/onnxruntime/test/python/quantization/test_op_pooling.py b/onnxruntime/test/python/quantization/test_op_pooling.py index 539affc314ce9..5364171307fc9 100644 --- a/onnxruntime/test/python/quantization/test_op_pooling.py +++ b/onnxruntime/test/python/quantization/test_op_pooling.py @@ -10,8 +10,13 @@ import numpy as np import onnx from onnx import TensorProto, helper -from op_test_utils import check_op_nodes # noqa: F401 -from op_test_utils import TestDataFeeds, check_model_correctness, check_op_type_count, check_qtype_by_node_type +from op_test_utils import ( + TestDataFeeds, + check_model_correctness, + check_op_nodes, # noqa: F401 + check_op_type_count, + check_qtype_by_node_type, +) from onnxruntime.quantization import QuantFormat, QuantType, quantize_static diff --git a/onnxruntime/test/python/quantization/test_qdq.py b/onnxruntime/test/python/quantization/test_qdq.py index 23b397ffd80e1..178cb9d876fcf 100644 --- a/onnxruntime/test/python/quantization/test_qdq.py +++ b/onnxruntime/test/python/quantization/test_qdq.py @@ -759,12 +759,12 @@ def verify_qdq( QuantType.QInt16: TensorProto.INT16, QuantType.QUInt16: TensorProto.UINT16, } - assert ( - weight_type not in to_tensor_types or to_tensor_types[weight_type] in zero_types - ), f"weight_type={weight_type} not in zero_types={zero_types}" - assert ( - activation_type not in to_tensor_types or to_tensor_types[activation_type] in zero_types - ), f"activation_type={activation_type} not in zero_types={zero_types}" + assert weight_type not in to_tensor_types or to_tensor_types[weight_type] in zero_types, ( + f"weight_type={weight_type} not in zero_types={zero_types}" + ) + assert activation_type not in to_tensor_types or to_tensor_types[activation_type] in zero_types, ( + f"activation_type={activation_type} not in zero_types={zero_types}" + ) check_model_correctness(self, model_fp32_path, model_qdq_path, data_reader.get_next(), rtol=rtol, atol=atol) diff --git a/onnxruntime/test/python/quantization/test_qdq_loss_debug.py b/onnxruntime/test/python/quantization/test_qdq_loss_debug.py index e9108f157f953..5d70641547eae 100644 --- a/onnxruntime/test/python/quantization/test_qdq_loss_debug.py +++ b/onnxruntime/test/python/quantization/test_qdq_loss_debug.py @@ -9,7 +9,6 @@ import tempfile import unittest from pathlib import Path -from typing import Dict, List import numpy as np import onnx @@ -108,7 +107,7 @@ def rewind(self): def augment_model_collect_activations( model_path: str, augmented_model_path: str, data_reader: TestDataReader -) -> Dict[str, List[np.ndarray]]: +) -> dict[str, list[np.ndarray]]: modify_model_output_intermediate_tensors(model_path, augmented_model_path) tensor_dict = collect_activations(augmented_model_path, data_reader) @@ -149,12 +148,12 @@ def test_saved_tensors_match_internal_tensors(self): output_dict = {} output_info = infer_session.get_outputs() for batch in oracle_outputs: - for output, output_data in zip(output_info, batch): + for output, output_data in zip(output_info, batch, strict=False): output_dict.setdefault(output.name, []).append(output_data) for output_name, model_outputs in output_dict.items(): test_outputs = tensor_dict[output_name] - for expected, actual in zip(model_outputs, test_outputs): + for expected, actual in zip(model_outputs, test_outputs, strict=False): exp = expected.reshape(-1) act = actual.reshape(-1) np.testing.assert_equal(exp, act) diff --git a/onnxruntime/test/python/quantization/test_quantizer_shape_inference.py b/onnxruntime/test/python/quantization/test_quantizer_shape_inference.py index 2b5d1f36070e5..ffa35a010fcab 100644 --- a/onnxruntime/test/python/quantization/test_quantizer_shape_inference.py +++ b/onnxruntime/test/python/quantization/test_quantizer_shape_inference.py @@ -38,7 +38,7 @@ def test_com_microsoft(self): opset_imports=[oh.make_opsetid("", 18), oh.make_opsetid("com.microsoft", 1)], ) model_shaped = onnx.shape_inference.infer_shapes(model) - shaped_results = set(t.name for t in model_shaped.graph.value_info) + shaped_results = {t.name for t in model_shaped.graph.value_info} # every result after T1 depends on T2 coming from a node com.microsoft, # shape_inference cannot go beyond this point self.assertEqual(shaped_results, {"T1"}) diff --git a/onnxruntime/test/python/quantization/test_tensor_quant_overrides_option.py b/onnxruntime/test/python/quantization/test_tensor_quant_overrides_option.py index 41dae04f1c6ff..be10575b535e4 100644 --- a/onnxruntime/test/python/quantization/test_tensor_quant_overrides_option.py +++ b/onnxruntime/test/python/quantization/test_tensor_quant_overrides_option.py @@ -420,7 +420,7 @@ def test_qdq_overrides_per_channel2(self): ) self.assertEqual(wgt_zp.data_type, quant_type.tensor_type) - for index, (zp, scale) in enumerate(zip(wgt_zp.int32_data, wgt_sc.float_data)): + for index, (zp, scale) in enumerate(zip(wgt_zp.int32_data, wgt_sc.float_data, strict=False)): wgt_qmin, wgt_qmax = get_qmin_qmax_for_qType( wgt_zp.data_type, symmetric=True, # per-channel is always symmetric @@ -1195,7 +1195,9 @@ def test_get_qnn_qdq_config_ext_data_separate_dir(self): # get_qnn_qdq_config() should be able to validate the per-channel axis without having to load # the external weight data. qnn_config = get_qnn_qdq_config( - str(model_path), DummyDataReader([]), init_overrides=init_overrides # Dummy data reader does nothing + str(model_path), + DummyDataReader([]), + init_overrides=init_overrides, # Dummy data reader does nothing ) self.assertEqual(set(qnn_config.op_types_to_quantize), {"Conv"}) self.assertTrue(qnn_config.use_external_data_format) diff --git a/onnxruntime/test/python/test_pytorch_export_contrib_ops.py b/onnxruntime/test/python/test_pytorch_export_contrib_ops.py index 5e20d6b4e692a..96a9aaad3c331 100644 --- a/onnxruntime/test/python/test_pytorch_export_contrib_ops.py +++ b/onnxruntime/test/python/test_pytorch_export_contrib_ops.py @@ -43,7 +43,10 @@ def to_numpy(tensor): assert len(outputs) == len(ort_outs), "number of outputs differ" # compare onnxruntime and PyTorch results - [np.testing.assert_allclose(out, ort_out, rtol=rtol, atol=atol) for out, ort_out in zip(outputs, ort_outs)] + [ + np.testing.assert_allclose(out, ort_out, rtol=rtol, atol=atol) + for out, ort_out in zip(outputs, ort_outs, strict=False) + ] # These set of tests verify ONNX model export and compares outputs between diff --git a/onnxruntime/test/python/transformers/benchmark_gqa.py b/onnxruntime/test/python/transformers/benchmark_gqa.py index 53d015a029083..41dbdf255f35c 100644 --- a/onnxruntime/test/python/transformers/benchmark_gqa.py +++ b/onnxruntime/test/python/transformers/benchmark_gqa.py @@ -6,13 +6,12 @@ """ Benchmark performance of GroupQueryAttention. """ -from typing import Optional import torch from test_sparse_attention import GroupQueryAttentionConfig, OrtGroupQueryAttention -def get_plot_algos(sm: int, local_window_size: Optional[int]): +def get_plot_algos(sm: int, local_window_size: int | None): # GQA with local windows only works in sm=8x if sm >= 80 and local_window_size: return { @@ -36,7 +35,7 @@ def plot_prompt_performance( kv_num_heads: int, head_size: int, max_seq_len: int, - local_window_size: Optional[int] = None, + local_window_size: int | None = None, use_smooth_softmax: bool = False, ): import triton @@ -69,7 +68,7 @@ def benchmark( num_heads: int, kv_num_heads: int, head_size: int, - local_window_size: Optional[int] = None, + local_window_size: int | None = None, use_smooth_softmax: bool = False, device="cuda", ): @@ -106,7 +105,7 @@ def plot_token_performance( kv_num_heads: int, head_size: int, max_seq_len: int, - local_window_size: Optional[int] = None, + local_window_size: int | None = None, use_smooth_softmax: bool = False, ): import triton @@ -139,7 +138,7 @@ def benchmark( num_heads: int, kv_num_heads: int, head_size: int, - local_window_size: Optional[int] = None, + local_window_size: int | None = None, use_smooth_softmax: bool = False, device="cuda", ): diff --git a/onnxruntime/test/python/transformers/benchmark_gqa_windows.py b/onnxruntime/test/python/transformers/benchmark_gqa_windows.py index 79cc8e41bf343..97ff8f4b21a68 100644 --- a/onnxruntime/test/python/transformers/benchmark_gqa_windows.py +++ b/onnxruntime/test/python/transformers/benchmark_gqa_windows.py @@ -1,7 +1,6 @@ import argparse import os import time -from typing import Optional import torch from test_sparse_attention import GroupQueryAttentionConfig, OrtGroupQueryAttention @@ -36,7 +35,7 @@ def benchmark( max_seq_len: int, sequence_length: int = 1, past_sequence_length: int = 0, - local_window_size: Optional[int] = None, + local_window_size: int | None = None, use_smooth_softmax: bool = False, model_name: str = "Llama3-8B", ): diff --git a/onnxruntime/test/python/transformers/benchmark_mha.py b/onnxruntime/test/python/transformers/benchmark_mha.py index d922f153b4b91..d5bcabe0bf147 100644 --- a/onnxruntime/test/python/transformers/benchmark_mha.py +++ b/onnxruntime/test/python/transformers/benchmark_mha.py @@ -23,10 +23,10 @@ import sys import threading import time +from collections.abc import Callable from contextlib import nullcontext from datetime import datetime from enum import IntEnum -from typing import Callable, Dict, List, Optional, Tuple import torch import torch.utils.benchmark as benchmark @@ -56,7 +56,7 @@ def convert(format_str: str) -> int: return names.index(format_str) @staticmethod - def get_name_list() -> List[str]: + def get_name_list() -> list[str]: return ["Q,K,V", "QKV", "Q,KV", "Q,K',V'"] @@ -95,7 +95,7 @@ def __init__( max_cache_sequence_length=None, scale: float = 0.0, provider="CPUExecutionProvider", - device: Optional[torch.device] = None, + device: torch.device | None = None, enable_cuda_graph: bool = False, dtype=torch.float, use_kv_cache: bool = False, @@ -205,7 +205,7 @@ def __repr__(self): ) def shape_dict(self, input_format=None): - shapes: Dict[str, Tuple] = { + shapes: dict[str, tuple] = { "output": (self.batch_size, self.sequence_length, self.num_heads * self.head_size), } @@ -272,7 +272,7 @@ def shape_dict(self, input_format=None): return shapes def symbolic_shape_dict(self, input_format=None): - shapes: Dict[str, Tuple] = { + shapes: dict[str, tuple] = { "output": ("batch_size", "sequence_length", self.num_heads * self.head_size), } @@ -346,7 +346,7 @@ def right_side_padding_masks(self): ) if self.mask_format != AttentionMaskFormat.Mask_None: - for i, (m, n) in enumerate(zip(self.mask_index_q, self.mask_index_kv)): + for i, (m, n) in enumerate(zip(self.mask_index_q, self.mask_index_kv, strict=False)): q_mask[i, :, m:, :] = False k_mask[i, :, n:, :] = False mask[i, :, m:, :] = False @@ -660,7 +660,7 @@ def run_torch_sdpa( has_mask: bool = False, mask_dim: int = 2, mask_dtype=torch.bool, - backend: Optional[int] = None, + backend: int | None = None, repeats: int = 100, ): q_shape = (batch_size, num_heads, q_seq_len, head_size) diff --git a/onnxruntime/test/python/transformers/bert_model_generator.py b/onnxruntime/test/python/transformers/bert_model_generator.py index a84137f092e64..0bb71bd8736d4 100644 --- a/onnxruntime/test/python/transformers/bert_model_generator.py +++ b/onnxruntime/test/python/transformers/bert_model_generator.py @@ -5,7 +5,6 @@ # -------------------------------------------------------------------------- import math -from typing import List import numpy as np import onnx @@ -13,7 +12,7 @@ from packaging import version -def float_tensor(name: str, shape: List[int], random=False): +def float_tensor(name: str, shape: list[int], random=False): low = 0.0 high = 1.0 total_elements = 1 diff --git a/onnxruntime/test/python/transformers/conformer_model_generator.py b/onnxruntime/test/python/transformers/conformer_model_generator.py index 5b27a46ea0fdc..4e76478bfb649 100644 --- a/onnxruntime/test/python/transformers/conformer_model_generator.py +++ b/onnxruntime/test/python/transformers/conformer_model_generator.py @@ -4,7 +4,6 @@ # license information. # -------------------------------------------------------------------------- -from typing import List import numpy as np import onnx @@ -13,7 +12,7 @@ # Adapted from bert_model_generator.py -def get_tensor_and_weight(name: str, shape: List[int], random=False, zeros=False): +def get_tensor_and_weight(name: str, shape: list[int], random=False, zeros=False): low = 0.0 high = 1.0 total_elements = 1 @@ -22,7 +21,9 @@ def get_tensor_and_weight(name: str, shape: List[int], random=False, zeros=False weights = ( [np.random.uniform(low, high) for _ in range(total_elements)] if random - else [0.0] * total_elements if zeros else [1.0] * total_elements + else [0.0] * total_elements + if zeros + else [1.0] * total_elements ) return helper.make_tensor(name, TensorProto.FLOAT, shape, weights), weights diff --git a/onnxruntime/test/python/transformers/gpt2_model_generator.py b/onnxruntime/test/python/transformers/gpt2_model_generator.py index 0865c87b70da7..74136c2b8bc61 100644 --- a/onnxruntime/test/python/transformers/gpt2_model_generator.py +++ b/onnxruntime/test/python/transformers/gpt2_model_generator.py @@ -5,7 +5,6 @@ # -------------------------------------------------------------------------- import math -from typing import List # noqa: F401 import numpy import onnx diff --git a/onnxruntime/test/python/transformers/parity_utilities.py b/onnxruntime/test/python/transformers/parity_utilities.py index d7f79304d2d2b..376b684c7648c 100644 --- a/onnxruntime/test/python/transformers/parity_utilities.py +++ b/onnxruntime/test/python/transformers/parity_utilities.py @@ -115,9 +115,9 @@ def optimize_onnx( onnx_model.save_model_to_file(optimized_onnx_path) if expected_op is not None: - assert ( - len(onnx_model.get_nodes_by_op_type(expected_op)) == 1 - ), f"Expected {expected_op} node not found in the optimized model {optimized_onnx_path}" + assert len(onnx_model.get_nodes_by_op_type(expected_op)) == 1, ( + f"Expected {expected_op} node not found in the optimized model {optimized_onnx_path}" + ) def diff_outputs(torch_outputs, ort_outputs, index): diff --git a/onnxruntime/test/python/transformers/rotary_flash.py b/onnxruntime/test/python/transformers/rotary_flash.py index 4329b2c1a6057..a033805ec0d5e 100644 --- a/onnxruntime/test/python/transformers/rotary_flash.py +++ b/onnxruntime/test/python/transformers/rotary_flash.py @@ -1,8 +1,6 @@ # Copyright (c) 2023, Tri Dao. -from typing import Optional, Tuple, Union - import torch import triton import triton.language as tl @@ -142,9 +140,9 @@ def apply_rotary( x: torch.Tensor, cos: torch.Tensor, sin: torch.Tensor, - seqlen_offsets: Union[int, torch.Tensor] = 0, - cu_seqlens: Optional[torch.Tensor] = None, - max_seqlen: Optional[int] = None, + seqlen_offsets: int | torch.Tensor = 0, + cu_seqlens: torch.Tensor | None = None, + max_seqlen: int | None = None, interleaved=False, inplace=False, conjugate=False, @@ -265,9 +263,9 @@ def forward( sin, interleaved=False, inplace=False, - seqlen_offsets: Union[int, torch.Tensor] = 0, - cu_seqlens: Optional[torch.Tensor] = None, - max_seqlen: Optional[int] = None, + seqlen_offsets: int | torch.Tensor = 0, + cu_seqlens: torch.Tensor | None = None, + max_seqlen: int | None = None, ): out = apply_rotary( x, @@ -321,9 +319,9 @@ def apply_rotary_emb( sin, interleaved=False, inplace=False, - seqlen_offsets: Union[int, torch.Tensor] = 0, - cu_seqlens: Optional[torch.Tensor] = None, - max_seqlen: Optional[int] = None, + seqlen_offsets: int | torch.Tensor = 0, + cu_seqlens: torch.Tensor | None = None, + max_seqlen: int | None = None, ): """ Arguments: @@ -360,7 +358,7 @@ def forward( cos_k=None, sin_k=None, interleaved=False, - seqlen_offsets: Union[int, torch.Tensor] = 0, + seqlen_offsets: int | torch.Tensor = 0, ): batch, seqlen, three, nheads, headdim = qkv.shape assert three == 3 @@ -432,7 +430,7 @@ def apply_rotary_emb_qkv_( cos_k=None, sin_k=None, interleaved=False, - seqlen_offsets: Union[int, torch.Tensor] = 0, + seqlen_offsets: int | torch.Tensor = 0, ): """ Arguments: @@ -453,7 +451,7 @@ def apply_rotary_emb_qkv_( class ApplyRotaryEmbKV(torch.autograd.Function): @staticmethod - def forward(ctx, kv, cos, sin, interleaved=False, seqlen_offsets: Union[int, torch.Tensor] = 0): + def forward(ctx, kv, cos, sin, interleaved=False, seqlen_offsets: int | torch.Tensor = 0): batch, seqlen, two, nheads, headdim = kv.shape assert two == 2 k = kv[:, :, 0] @@ -491,7 +489,7 @@ def apply_rotary_emb_kv_( cos, sin, interleaved=False, - seqlen_offsets: Union[int, torch.Tensor] = 0, + seqlen_offsets: int | torch.Tensor = 0, ): """ Arguments: @@ -623,10 +621,10 @@ def _update_cos_sin_cache(self, seqlen, device=None, dtype=None): def forward( self, qkv: torch.Tensor, - kv: Optional[torch.Tensor] = None, - seqlen_offset: Union[int, torch.Tensor] = 0, - max_seqlen: Optional[int] = None, - ) -> Union[torch.Tensor, Tuple[torch.Tensor, torch.Tensor]]: + kv: torch.Tensor | None = None, + seqlen_offset: int | torch.Tensor = 0, + max_seqlen: int | None = None, + ) -> torch.Tensor | tuple[torch.Tensor, torch.Tensor]: """ qkv: (batch, seqlen, 3, nheads, headdim) if kv is none, else it's just q of shape (batch, seqlen, nheads, headdim) diff --git a/onnxruntime/test/python/transformers/test_data/bert_squad_tensorflow2.1_keras2onnx_opset11/generate_tiny_keras2onnx_bert_models.py b/onnxruntime/test/python/transformers/test_data/bert_squad_tensorflow2.1_keras2onnx_opset11/generate_tiny_keras2onnx_bert_models.py index c1e95f35a633b..b69cae58b963c 100644 --- a/onnxruntime/test/python/transformers/test_data/bert_squad_tensorflow2.1_keras2onnx_opset11/generate_tiny_keras2onnx_bert_models.py +++ b/onnxruntime/test/python/transformers/test_data/bert_squad_tensorflow2.1_keras2onnx_opset11/generate_tiny_keras2onnx_bert_models.py @@ -291,9 +291,9 @@ def resize_model(self): reshapes[initializer.name] = new_shape print("initializer", initializer.name, tensor.shape, "=>", new_shape) - for initializer_name in reshapes: + for initializer_name, reshape_name in reshapes.items(): self.replace_input_of_all_nodes(initializer_name, initializer_name + "_resize") - tensor = self.resize_weight(initializer_name, reshapes[initializer_name]) + tensor = self.resize_weight(initializer_name, reshape_name) self.model.graph.initializer.extend([tensor]) self.use_dynamic_axes() diff --git a/onnxruntime/test/python/transformers/test_data/gpt2_pytorch1.5_opset11/generate_tiny_gpt2_model.py b/onnxruntime/test/python/transformers/test_data/gpt2_pytorch1.5_opset11/generate_tiny_gpt2_model.py index 4a4a0bc2c5098..fc0f1e01f7829 100644 --- a/onnxruntime/test/python/transformers/test_data/gpt2_pytorch1.5_opset11/generate_tiny_gpt2_model.py +++ b/onnxruntime/test/python/transformers/test_data/gpt2_pytorch1.5_opset11/generate_tiny_gpt2_model.py @@ -331,9 +331,9 @@ def resize_model(self): reshapes[initializer.name] = new_shape print("initializer", initializer.name, tensor.shape, "=>", new_shape) - for initializer_name in reshapes: + for initializer_name, reshape_name in reshapes.items(): self.replace_input_of_all_nodes(initializer_name, initializer_name + "_resize") - tensor = self.resize_weight(initializer_name, reshapes[initializer_name]) + tensor = self.resize_weight(initializer_name, reshape_name) self.model.graph.initializer.extend([tensor]) # Add node name, replace split node attribute. diff --git a/onnxruntime/test/python/transformers/test_data/models/phi-3.5-v-instruct-vision-attention.onnx b/onnxruntime/test/python/transformers/test_data/models/phi-3.5-v-instruct-vision-attention.onnx new file mode 100644 index 0000000000000..a1a1bfc2a8f64 Binary files /dev/null and b/onnxruntime/test/python/transformers/test_data/models/phi-3.5-v-instruct-vision-attention.onnx differ diff --git a/onnxruntime/test/python/transformers/test_data/models/phi-3.5-v-instruct-vision-layernorm.onnx b/onnxruntime/test/python/transformers/test_data/models/phi-3.5-v-instruct-vision-layernorm.onnx new file mode 100644 index 0000000000000..7f312ddeef140 Binary files /dev/null and b/onnxruntime/test/python/transformers/test_data/models/phi-3.5-v-instruct-vision-layernorm.onnx differ diff --git a/onnxruntime/test/python/transformers/test_data/models/phi-3.5-v-instruct-vision-quickgelu.onnx b/onnxruntime/test/python/transformers/test_data/models/phi-3.5-v-instruct-vision-quickgelu.onnx new file mode 100644 index 0000000000000..23576b11c8cf0 --- /dev/null +++ b/onnxruntime/test/python/transformers/test_data/models/phi-3.5-v-instruct-vision-quickgelu.onnx @@ -0,0 +1,16 @@ +onnxruntime.transformers1.20.1:’ +H + onnx::Mul_04 QuickGelu_0" QuickGelu* +alpha#ÛÙ? : com.microsoft +main_graphZ! + onnx::Mul_0 + + + +b +4 + + + +BB + com.microsoft \ No newline at end of file diff --git a/onnxruntime/test/python/transformers/test_gemmfastgelu_fusion.py b/onnxruntime/test/python/transformers/test_gemmfastgelu_fusion.py index 431ae21cd5eaf..c4c136981e7a9 100644 --- a/onnxruntime/test/python/transformers/test_gemmfastgelu_fusion.py +++ b/onnxruntime/test/python/transformers/test_gemmfastgelu_fusion.py @@ -6,7 +6,6 @@ import os import unittest -from typing import List import numpy as np import onnx @@ -33,7 +32,7 @@ opsets = [onnxdomain, msdomain] -def float_tensor(name: str, shape: List[int], random=False): +def float_tensor(name: str, shape: list[int], random=False): low = 0.0 high = 1.0 total_elements = 1 diff --git a/onnxruntime/test/python/transformers/test_group_norm.py b/onnxruntime/test/python/transformers/test_group_norm.py index bf295a65c8b53..7a04df8b39c0d 100644 --- a/onnxruntime/test/python/transformers/test_group_norm.py +++ b/onnxruntime/test/python/transformers/test_group_norm.py @@ -7,7 +7,6 @@ from dataclasses import dataclass from enum import Enum from time import perf_counter -from typing import Optional, Tuple import numpy import torch @@ -215,11 +214,11 @@ def group_norm_ort( src: torch.Tensor, gamma: torch.Tensor, beta: torch.Tensor, - skip: Optional[torch.Tensor], - bias: Optional[torch.Tensor], + skip: torch.Tensor | None, + bias: torch.Tensor | None, config: GroupNormConfig, measure_latency=False, -) -> Tuple[torch.Tensor, Optional[torch.Tensor], Optional[float]]: +) -> tuple[torch.Tensor, torch.Tensor | None, float | None]: onnx_model_str = create_group_norm_graph(config) ort_session = InferenceSession(onnx_model_str, providers=["CUDAExecutionProvider"]) @@ -276,10 +275,10 @@ def group_norm_torch( src: torch.Tensor, gamma: torch.Tensor, beta: torch.Tensor, - skip: Optional[torch.Tensor], - bias: Optional[torch.Tensor], + skip: torch.Tensor | None, + bias: torch.Tensor | None, config: GroupNormConfig, -) -> Tuple[torch.Tensor, Optional[torch.Tensor]]: +) -> tuple[torch.Tensor, torch.Tensor | None]: add_out = src if skip is not None: diff --git a/onnxruntime/test/python/transformers/test_mha.py b/onnxruntime/test/python/transformers/test_mha.py index 45726ecc7c2b0..dc19e3ec95243 100644 --- a/onnxruntime/test/python/transformers/test_mha.py +++ b/onnxruntime/test/python/transformers/test_mha.py @@ -11,7 +11,6 @@ import itertools import os import unittest -from typing import Dict, List, Optional import numpy import torch @@ -102,9 +101,9 @@ def attention_reference( query: torch.Tensor, key: torch.Tensor, value: torch.Tensor, - scale: Optional[float] = None, - attn_bias: Optional[torch.Tensor] = None, - mask: Optional[torch.Tensor] = None, + scale: float | None = None, + attn_bias: torch.Tensor | None = None, + mask: torch.Tensor | None = None, verbose: bool = False, ) -> torch.Tensor: """Reference implementation of SDPA @@ -171,26 +170,26 @@ def attention_reference( def mha_with_past_reference( config: MultiHeadAttentionConfig, - past_k: Optional[torch.Tensor], - past_v: Optional[torch.Tensor], + past_k: torch.Tensor | None, + past_v: torch.Tensor | None, q: torch.Tensor, k: torch.Tensor, v: torch.Tensor, - scale: Optional[float] = None, - attn_bias: Optional[torch.Tensor] = None, - mask: Optional[torch.Tensor] = None, + scale: float | None = None, + attn_bias: torch.Tensor | None = None, + mask: torch.Tensor | None = None, ): assert config.kv_sequence_length == config.sequence_length assert config.use_kv_cache if past_k is not None: - assert ( - past_k.dim() == 4 and k.dim() == 4 and past_k.size(1) == k.size(1) - ), f"expect BNSH format: {past_k.shape=} {k.shape=}" + assert past_k.dim() == 4 and k.dim() == 4 and past_k.size(1) == k.size(1), ( + f"expect BNSH format: {past_k.shape=} {k.shape=}" + ) if past_v is not None: - assert ( - past_v.dim() == 4 and v.dim() == 4 and past_v.size(1) == v.size(1) - ), f"expect BNSH format: {past_v.shape=} {v.shape=}" + assert past_v.dim() == 4 and v.dim() == 4 and past_v.size(1) == v.size(1), ( + f"expect BNSH format: {past_v.shape=} {v.shape=}" + ) present_k = torch.cat((past_k, k), dim=2) if past_k is not None else k present_v = torch.cat((past_v, v), dim=2) if past_v is not None else v @@ -533,7 +532,6 @@ def causal_mask(seqlen_q, seqlen_k, query_padding_mask=None, key_padding_mask=No def merge_padding_and_causal_masks(config): - q_mask, k_mask, mask = config.right_side_padding_masks() if config.causal: query_padding_mask = q_mask.reshape(config.batch_size, config.sequence_length) @@ -649,7 +647,7 @@ def parity_check_mha( def parity_check_mha_multi_threading( - test_inputs: List[Dict], + test_inputs: list[dict], rtol: float = 1e-3, atol: float = 1e-3, attention_kernel=SdpaKernel.DEFAULT, diff --git a/onnxruntime/test/python/transformers/test_optimizer_stable_diffusion.py b/onnxruntime/test/python/transformers/test_optimizer_stable_diffusion.py index dca250f39fae2..692382a12da9f 100644 --- a/onnxruntime/test/python/transformers/test_optimizer_stable_diffusion.py +++ b/onnxruntime/test/python/transformers/test_optimizer_stable_diffusion.py @@ -29,6 +29,8 @@ TINY_MODELS = { "stable-diffusion": "hf-internal-testing/tiny-stable-diffusion-torch", "stable-diffusion-xl": "echarlaix/tiny-random-stable-diffusion-xl", + "stable-diffusion-3": "optimum-internal-testing/tiny-random-stable-diffusion-3", + "flux": "tlwu/tiny-random-flux", } @@ -114,157 +116,287 @@ def test_clip_sd(self): float16=True, ) - @pytest.mark.slow - def test_clip_sdxl(self): - save_directory = "tiny-random-stable-diffusion-xl" - if os.path.exists(save_directory): - shutil.rmtree(save_directory, ignore_errors=True) - - model_type = "stable-diffusion-xl" - model_name = TINY_MODELS[model_type] - - from optimum.onnxruntime import ORTStableDiffusionXLPipeline - - base = ORTStableDiffusionXLPipeline.from_pretrained(model_name, export=True) - base.save_pretrained(save_directory) - - clip_onnx_path = os.path.join(save_directory, "text_encoder", "model.onnx") - optimized_clip_onnx_path = os.path.join(save_directory, "text_encoder", "opt.onnx") - self.verify_clip_optimizer( - clip_onnx_path, - optimized_clip_onnx_path, - expected_counters={ - "EmbedLayerNormalization": 0, - "Attention": 5, - "SkipLayerNormalization": 10, - "LayerNormalization": 1, - "Gelu": 0, - "BiasGelu": 5, - }, - ) - - clip_onnx_path = os.path.join(save_directory, "text_encoder_2", "model.onnx") - optimized_clip_onnx_path = os.path.join(save_directory, "text_encoder_2", "opt.onnx") - self.verify_clip_optimizer( - clip_onnx_path, - optimized_clip_onnx_path, - expected_counters={ - "EmbedLayerNormalization": 0, - "Attention": 5, - "SkipLayerNormalization": 10, - "LayerNormalization": 1, - "Gelu": 0, - "BiasGelu": 5, - }, - ) - - @pytest.mark.slow - def test_optimize_sdxl_fp32(self): - save_directory = "tiny-random-stable-diffusion-xl" - if os.path.exists(save_directory): - shutil.rmtree(save_directory, ignore_errors=True) - model_type = "stable-diffusion-xl" - model_name = TINY_MODELS[model_type] - - from optimum.onnxruntime import ORTStableDiffusionXLPipeline +class TestStableDiffusionOrFluxPipelineOptimization(unittest.TestCase): + def verify_pipeline_optimization( + self, + model_name, + export_onnx_dir, + optimized_onnx_dir, + expected_op_counters, + is_float16, + atol, + disable_group_norm=False, + ): + from optimum.onnxruntime import ORTPipelineForText2Image - baseline = ORTStableDiffusionXLPipeline.from_pretrained(model_name, export=True) - if not os.path.exists(save_directory): - baseline.save_pretrained(save_directory) + if os.path.exists(export_onnx_dir): + shutil.rmtree(export_onnx_dir, ignore_errors=True) - batch_size, num_images_per_prompt, height, width = 2, 2, 64, 64 - latents = baseline.prepare_latents( - batch_size * num_images_per_prompt, - baseline.unet.config["in_channels"], - height, - width, - dtype=np.float32, - generator=np.random.RandomState(0), - ) + baseline = ORTPipelineForText2Image.from_pretrained(model_name, export=True, provider="CUDAExecutionProvider") + if not os.path.exists(export_onnx_dir): + baseline.save_pretrained(export_onnx_dir) - optimized_directory = "tiny-random-stable-diffusion-xl-optimized" argv = [ "--input", - save_directory, + export_onnx_dir, "--output", - optimized_directory, - "--disable_group_norm", - "--disable_bias_splitgelu", + optimized_onnx_dir, "--overwrite", + "--disable_bias_splitgelu", ] - optimize_stable_diffusion(argv) - treatment = ORTStableDiffusionXLPipeline.from_pretrained(optimized_directory, provider="CUDAExecutionProvider") + if disable_group_norm: + argv.append("--disable_group_norm") + + if is_float16: + argv.append("--float16") + + op_counters = optimize_stable_diffusion(argv) + print(op_counters) + + for name in expected_op_counters: + self.assertIn(name, op_counters) + for op, count in expected_op_counters[name].items(): + self.assertIn(op, op_counters[name]) + self.assertEqual(op_counters[name][op], count, f"Expected {count} {op} in {name}") + + treatment = ORTPipelineForText2Image.from_pretrained(optimized_onnx_dir, provider="CUDAExecutionProvider") + batch_size, num_images_per_prompt, height, width = 1, 1, 64, 64 inputs = { "prompt": ["starry night by van gogh"] * batch_size, - "num_inference_steps": 3, + "num_inference_steps": 20, "num_images_per_prompt": num_images_per_prompt, "height": height, "width": width, - "guidance_rescale": 0.1, "output_type": "np", } - ort_outputs_1 = baseline(latents=latents, **inputs) - ort_outputs_2 = treatment(latents=latents, **inputs) - self.assertTrue(np.allclose(ort_outputs_1.images[0], ort_outputs_2.images[0], atol=1e-3)) + seed = 123 + np.random.seed(seed) + import torch + + baseline_outputs = baseline(**inputs, generator=torch.Generator(device="cuda").manual_seed(seed)) + + np.random.seed(seed) + treatment_outputs = treatment(**inputs, generator=torch.Generator(device="cuda").manual_seed(seed)) + + self.assertTrue(np.allclose(baseline_outputs.images[0], treatment_outputs.images[0], atol=atol)) @pytest.mark.slow - def test_optimize_sdxl_fp16(self): - """This tests optimized fp16 pipeline, and result is deterministic for a given seed""" - save_directory = "tiny-random-stable-diffusion-xl" - if os.path.exists(save_directory): - shutil.rmtree(save_directory, ignore_errors=True) + def test_sd(self): + """This tests optimization of stable diffusion 1.x pipeline""" + model_name = TINY_MODELS["stable-diffusion"] + + expected_op_counters = { + "unet": { + "Attention": 6, + "MultiHeadAttention": 6, + "LayerNormalization": 6, + "SkipLayerNormalization": 12, + "BiasSplitGelu": 0, + "GroupNorm": 0, + "SkipGroupNorm": 0, + "NhwcConv": 47, + "BiasAdd": 0, + }, + "vae_encoder": {"Attention": 0, "GroupNorm": 0, "SkipGroupNorm": 0, "NhwcConv": 13}, + "vae_decoder": {"Attention": 0, "GroupNorm": 0, "SkipGroupNorm": 0, "NhwcConv": 17}, + "text_encoder": { + "Attention": 5, + "Gelu": 0, + "LayerNormalization": 1, + "QuickGelu": 5, + "BiasGelu": 0, + "SkipLayerNormalization": 10, + }, + } - model_type = "stable-diffusion-xl" - model_name = TINY_MODELS[model_type] + export_onnx_dir = "tiny-random-sd" + optimized_onnx_dir = "tiny-random-sd-optimized-fp32" + # Disable GroupNorm due to limitation of current cuda kernel implementation. + self.verify_pipeline_optimization( + model_name, + export_onnx_dir, + optimized_onnx_dir, + expected_op_counters, + is_float16=False, + atol=5e-3, + disable_group_norm=True, + ) - from optimum.onnxruntime import ORTStableDiffusionXLPipeline + expected_op_counters["unet"].update({"Attention": 0, "MultiHeadAttention": 12}) + optimized_onnx_dir = "tiny-random-sd-optimized-fp16" + self.verify_pipeline_optimization( + model_name, + export_onnx_dir, + optimized_onnx_dir, + expected_op_counters, + is_float16=True, + atol=5e-2, + disable_group_norm=True, + ) - baseline = ORTStableDiffusionXLPipeline.from_pretrained(model_name, export=True) - if not os.path.exists(save_directory): - baseline.save_pretrained(save_directory) + @pytest.mark.slow + def test_sdxl(self): + """This tests optimization of SDXL pipeline""" + model_name = TINY_MODELS["stable-diffusion-xl"] + + expected_op_counters = { + "unet": { + "Attention": 12, + "MultiHeadAttention": 12, + "LayerNormalization": 6, + "SkipLayerNormalization": 30, + "BiasSplitGelu": 0, + "GroupNorm": 0, + "SkipGroupNorm": 0, + "NhwcConv": 35, + "BiasAdd": 0, + }, + "vae_encoder": {"Attention": 0, "GroupNorm": 0, "SkipGroupNorm": 0, "NhwcConv": 13}, + "vae_decoder": {"Attention": 0, "GroupNorm": 0, "SkipGroupNorm": 0, "NhwcConv": 17}, + "text_encoder": { + "Attention": 5, + "Gelu": 0, + "LayerNormalization": 1, + "QuickGelu": 0, + "BiasGelu": 5, + "SkipLayerNormalization": 10, + }, + "text_encoder_2": { + "Attention": 5, + "Gelu": 0, + "LayerNormalization": 1, + "QuickGelu": 0, + "BiasGelu": 5, + "SkipLayerNormalization": 10, + }, + } - optimized_directory = "tiny-random-stable-diffusion-xl-optimized-fp16" - argv = [ - "--input", - save_directory, - "--output", - optimized_directory, - "--disable_group_norm", - "--disable_bias_splitgelu", - "--float16", - "--overwrite", - ] - optimize_stable_diffusion(argv) + export_onnx_dir = "tiny-random-sdxl" + optimized_onnx_dir = "tiny-random-sdxl-optimized-fp32" + # Disable GroupNorm due to limitation of current cuda kernel implementation. + self.verify_pipeline_optimization( + model_name, + export_onnx_dir, + optimized_onnx_dir, + expected_op_counters, + is_float16=False, + atol=5e-3, + disable_group_norm=True, + ) - fp16_pipeline = ORTStableDiffusionXLPipeline.from_pretrained( - optimized_directory, provider="CUDAExecutionProvider" + expected_op_counters["unet"].update({"Attention": 0, "MultiHeadAttention": 24}) + optimized_onnx_dir = "tiny-random-sdxl-optimized-fp16" + self.verify_pipeline_optimization( + model_name, + export_onnx_dir, + optimized_onnx_dir, + expected_op_counters, + is_float16=True, + atol=5e-2, + disable_group_norm=True, ) - batch_size, num_images_per_prompt, height, width = 1, 1, 64, 64 - inputs = { - "prompt": ["starry night by van gogh"] * batch_size, - "num_inference_steps": 3, - "num_images_per_prompt": num_images_per_prompt, - "height": height, - "width": width, - "guidance_rescale": 0.1, - "output_type": "latent", + + @pytest.mark.slow + def test_sd3(self): + """This tests optimization of stable diffusion 3 pipeline""" + model_name = TINY_MODELS["stable-diffusion-3"] + + expected_op_counters = { + "transformer": { + "FastGelu": 3, + "MultiHeadAttention": 2, + "LayerNormalization": 8, + "SimplifiedLayerNormalization": 0, + }, + "vae_encoder": {"Attention": 0, "GroupNorm": 10, "SkipGroupNorm": 3, "NhwcConv": 17}, + "vae_decoder": {"Attention": 0, "GroupNorm": 14, "SkipGroupNorm": 7, "NhwcConv": 25}, + "text_encoder": { + "Attention": 2, + "Gelu": 0, + "LayerNormalization": 1, + "QuickGelu": 2, + "SkipLayerNormalization": 4, + }, + "text_encoder_2": { + "Attention": 2, + "Gelu": 0, + "LayerNormalization": 1, + "QuickGelu": 0, + "SkipLayerNormalization": 4, + }, + "text_encoder_3": { + "Attention": 2, + "MultiHeadAttention": 0, + "Gelu": 0, + "FastGelu": 2, + "BiasGelu": 0, + "GemmFastGelu": 0, + "LayerNormalization": 0, + "SimplifiedLayerNormalization": 2, + "SkipLayerNormalization": 0, + "SkipSimplifiedLayerNormalization": 3, + }, } - seed = 123 - np.random.seed(seed) - ort_outputs_1 = fp16_pipeline(**inputs) + export_onnx_dir = "tiny-random-stable-diffusion-3" + optimized_onnx_dir = "tiny-random-stable-diffusion-3-optimized-fp32" + self.verify_pipeline_optimization( + model_name, export_onnx_dir, optimized_onnx_dir, expected_op_counters, is_float16=False, atol=5e-3 + ) - np.random.seed(seed) - ort_outputs_2 = fp16_pipeline(**inputs) + optimized_onnx_dir = "tiny-random-stable-diffusion-3-optimized-fp16" + self.verify_pipeline_optimization( + model_name, export_onnx_dir, optimized_onnx_dir, expected_op_counters, is_float16=True, atol=5e-2 + ) - np.random.seed(seed) - ort_outputs_3 = fp16_pipeline(**inputs) + @pytest.mark.slow + def test_flux(self): + """This tests optimization of flux pipeline""" + model_name = TINY_MODELS["flux"] + + expected_op_counters = { + "transformer": { + "FastGelu": 8, + "MultiHeadAttention": 6, + "LayerNormalization": 13, + "SimplifiedLayerNormalization": 16, + }, + "vae_encoder": {"Attention": 0, "GroupNorm": 10, "SkipGroupNorm": 3, "NhwcConv": 17}, + "vae_decoder": {"Attention": 0, "GroupNorm": 14, "SkipGroupNorm": 7, "NhwcConv": 25}, + "text_encoder": { + "Attention": 2, + "Gelu": 0, + "LayerNormalization": 1, + "QuickGelu": 2, + "SkipLayerNormalization": 4, + }, + "text_encoder_2": { + "Attention": 2, + "MultiHeadAttention": 0, + "Gelu": 0, + "FastGelu": 2, + "BiasGelu": 0, + "GemmFastGelu": 0, + "LayerNormalization": 0, + "SimplifiedLayerNormalization": 2, + "SkipLayerNormalization": 0, + "SkipSimplifiedLayerNormalization": 3, + }, + } - self.assertTrue(np.array_equal(ort_outputs_1.images[0], ort_outputs_2.images[0])) - self.assertTrue(np.array_equal(ort_outputs_1.images[0], ort_outputs_3.images[0])) + export_onnx_dir = "tiny-random-flux" + optimized_onnx_dir = "tiny-random-flux-optimized-fp32" + self.verify_pipeline_optimization( + model_name, export_onnx_dir, optimized_onnx_dir, expected_op_counters, is_float16=False, atol=1e-3 + ) + + optimized_onnx_dir = "tiny-random-flux-optimized-fp16" + self.verify_pipeline_optimization( + model_name, export_onnx_dir, optimized_onnx_dir, expected_op_counters, is_float16=True, atol=5e-2 + ) if __name__ == "__main__": diff --git a/onnxruntime/test/python/transformers/test_parity_decoder_attention.py b/onnxruntime/test/python/transformers/test_parity_decoder_attention.py index e870e7f95fcee..8b4a68402f995 100644 --- a/onnxruntime/test/python/transformers/test_parity_decoder_attention.py +++ b/onnxruntime/test/python/transformers/test_parity_decoder_attention.py @@ -10,7 +10,6 @@ # license information. # ------------------------------------------------------------------------- -from typing import List, Optional, Tuple import numpy import torch @@ -118,7 +117,7 @@ def forward( self, query, key, - layer_state: Optional[List[Tensor]], + layer_state: list[Tensor] | None, encoder_decoder_attention: bool, use_past=torch.tensor(False), # noqa: B008 ): @@ -182,13 +181,13 @@ def forward( self, query, key: Tensor, - key_padding_mask: Optional[Tensor] = None, - layer_state: Optional[List[Tensor]] = None, - attn_mask: Optional[Tensor] = None, + key_padding_mask: Tensor | None = None, + layer_state: list[Tensor] | None = None, + attn_mask: Tensor | None = None, output_attentions: bool = False, use_past=torch.tensor(False), # noqa: B008 has_key_padding_mask: bool = False, - ) -> Tuple[Tensor, Optional[Tensor]]: + ) -> tuple[Tensor, Tensor | None]: """Input shape: Time(SeqLen) x Batch x Channel""" static_kv: bool = self.encoder_decoder_attention tgt_len, bsz, embed_dim = query.size() @@ -241,13 +240,13 @@ def ort_forward( self, query, key: Tensor, - key_padding_mask: Optional[Tensor] = None, - layer_state: Optional[List[Tensor]] = None, - attn_mask: Optional[Tensor] = None, + key_padding_mask: Tensor | None = None, + layer_state: list[Tensor] | None = None, + attn_mask: Tensor | None = None, output_attentions: bool = False, use_past=torch.tensor(False), # noqa: B008 has_key_padding_mask: bool = False, - ) -> Tuple[Tensor, Optional[Tensor]]: + ) -> tuple[Tensor, Tensor | None]: """Input shape: Time(SeqLen) x Batch x Channel""" # For readability static_kv = bool(self.encoder_decoder_attention) diff --git a/onnxruntime/test/python/transformers/test_parity_t5_mha.py b/onnxruntime/test/python/transformers/test_parity_t5_mha.py index 84708ddcf82a5..7eae2f0a231d4 100644 --- a/onnxruntime/test/python/transformers/test_parity_t5_mha.py +++ b/onnxruntime/test/python/transformers/test_parity_t5_mha.py @@ -418,9 +418,9 @@ def torch_forward( real_seq_length = seq_length if past_key_value is not None: - assert ( - len(past_key_value) == 2 - ), f"past_key_value should have 2 past states: keys and values. Got { len(past_key_value)} past states" + assert len(past_key_value) == 2, ( + f"past_key_value should have 2 past states: keys and values. Got {len(past_key_value)} past states" + ) real_seq_length += past_key_value[0].shape[2] if query_length is None else query_length key_length = real_seq_length if key_value_states is None else key_value_states.shape[1] @@ -538,9 +538,9 @@ def ort_forward( real_seq_length = seq_length if past_key_value is not None: - assert ( - len(past_key_value) == 2 - ), f"past_key_value should have 2 past states: keys and values. Got { len(past_key_value)} past states" + assert len(past_key_value) == 2, ( + f"past_key_value should have 2 past states: keys and values. Got {len(past_key_value)} past states" + ) real_seq_length += past_key_value[0].shape[2] if query_length is None else query_length def project(hidden_states, proj_layer, key_value_states, past_key_value): diff --git a/onnxruntime/test/python/transformers/test_phi_vision.py b/onnxruntime/test/python/transformers/test_phi_vision.py new file mode 100644 index 0000000000000..67f89e633a146 --- /dev/null +++ b/onnxruntime/test/python/transformers/test_phi_vision.py @@ -0,0 +1,254 @@ +# ------------------------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. See License.txt in the project root for +# license information. +# -------------------------------------------------------------------------- + +import os +import unittest + +import onnx +import torch +from parity_utilities import find_transformers_source + +if find_transformers_source(): + from fusion_options import FusionOptions + from onnx_model import OnnxModel + from optimizer import optimize_model +else: + from onnxruntime.transformers.fusion_options import FusionOptions + from onnxruntime.transformers.onnx_model import OnnxModel + from onnxruntime.transformers.optimizer import optimize_model + + +# From https://github.com/huggingface/transformers/blob/34f76bb62b915b43617aa88557aea97840e163f0/src/transformers/activations.py#L90 +class PhiVCLIPQuickGelu(torch.nn.Module): + def __init__(self): + super().__init__() + + def forward(self, x): + return x * torch.sigmoid(1.702 * x) + + +# Line-by-line calculation of https://github.com/huggingface/transformers/blob/34f76bb62b915b43617aa88557aea97840e163f0/src/transformers/models/clip/modeling_clip.py#L613 +class PhiVCLIPLayerNorm(torch.nn.Module): + def __init__(self): + super().__init__() + self.weight = torch.nn.Parameter(torch.ones(20)).to(torch.float16).detach() + self.bias = torch.nn.Parameter(torch.ones(20)).to(torch.float16).detach() + self.eps = 1e-05 + + def forward(self, x): + mean = x.mean(-1, keepdim=True) + diff = (x - mean).to(torch.float64) + variance = diff.pow(2).mean(-1, keepdim=True) + x = diff / torch.sqrt(variance + self.eps) + x = x.to(torch.float16) * self.weight + self.bias + return x + + +# From https://github.com/huggingface/transformers/blob/34f76bb62b915b43617aa88557aea97840e163f0/src/transformers/models/clip/modeling_clip.py#L300 +class PhiVCLIPAttention(torch.nn.Module): + def __init__(self): + super().__init__() + self.embed_dim = 20 + self.num_heads = 2 + self.head_dim = self.embed_dim // self.num_heads + + self.scale = self.head_dim**-0.5 + + self.k_proj = torch.nn.Linear(self.embed_dim, self.embed_dim) + self.v_proj = torch.nn.Linear(self.embed_dim, self.embed_dim) + self.q_proj = torch.nn.Linear(self.embed_dim, self.embed_dim) + self.out_proj = torch.nn.Linear(self.embed_dim, self.embed_dim) + + self.k_proj.weight.data.fill_(1) + self.k_proj.bias.data.fill_(1) + self.v_proj.weight.data.fill_(1) + self.v_proj.bias.data.fill_(1) + self.q_proj.weight.data.fill_(1) + self.q_proj.bias.data.fill_(1) + self.out_proj.weight.data.fill_(1) + self.out_proj.bias.data.fill_(1) + + def _shape(self, tensor: torch.Tensor, seq_len: int, bsz: int): + return tensor.view(bsz, seq_len, self.num_heads, self.head_dim).transpose(1, 2).contiguous() + + def forward( + self, + hidden_states, + attention_mask=None, + causal_attention_mask=None, + output_attentions=False, + ): + """Input shape: Batch x Time x Channel""" + + bsz, tgt_len, embed_dim = hidden_states.size() + + # get query proj + query_states = self.q_proj(hidden_states) * self.scale + key_states = self._shape(self.k_proj(hidden_states), -1, bsz) + value_states = self._shape(self.v_proj(hidden_states), -1, bsz) + + proj_shape = (bsz * self.num_heads, -1, self.head_dim) + query_states = self._shape(query_states, tgt_len, bsz).view(*proj_shape) + key_states = key_states.view(*proj_shape) + value_states = value_states.view(*proj_shape) + + src_len = key_states.size(1) + attn_weights = torch.bmm(query_states, key_states.transpose(1, 2)) + + if attn_weights.size() != (bsz * self.num_heads, tgt_len, src_len): + raise ValueError( + f"Attention weights should be of size {(bsz * self.num_heads, tgt_len, src_len)}, but is" + f" {attn_weights.size()}" + ) + + # apply the causal_attention_mask first + if causal_attention_mask is not None: + if causal_attention_mask.size() != (bsz, 1, tgt_len, src_len): + raise ValueError( + f"Attention mask should be of size {(bsz, 1, tgt_len, src_len)}, but is" + f" {causal_attention_mask.size()}" + ) + attn_weights = attn_weights.view(bsz, self.num_heads, tgt_len, src_len) + causal_attention_mask + attn_weights = attn_weights.view(bsz * self.num_heads, tgt_len, src_len) + + if attention_mask is not None: + if attention_mask.size() != (bsz, 1, tgt_len, src_len): + raise ValueError( + f"Attention mask should be of size {(bsz, 1, tgt_len, src_len)}, but is {attention_mask.size()}" + ) + attn_weights = attn_weights.view(bsz, self.num_heads, tgt_len, src_len) + attention_mask + attn_weights = attn_weights.view(bsz * self.num_heads, tgt_len, src_len) + + attn_weights = torch.nn.functional.softmax(attn_weights, dim=-1) + + attn_probs = torch.nn.functional.dropout(attn_weights, p=0, training=False) + + attn_output = torch.bmm(attn_probs, value_states) + + if attn_output.size() != (bsz * self.num_heads, tgt_len, self.head_dim): + raise ValueError( + f"`attn_output` should be of size {(bsz, self.num_heads, tgt_len, self.head_dim)}, but is" + f" {attn_output.size()}" + ) + + attn_output = attn_output.view(bsz, self.num_heads, tgt_len, self.head_dim) + attn_output = attn_output.transpose(1, 2) + attn_output = attn_output.reshape(bsz, tgt_len, embed_dim) + + attn_output = self.out_proj(attn_output) + + return attn_output + + +class PhiVCLIPAttentionAndLayerNorm(torch.nn.Module): + def __init__(self): + super().__init__() + self.attn = PhiVCLIPAttention() + self.ln = torch.nn.LayerNorm(20, eps=1e-05) + + def forward(self, x): + # SkipLayerNorm ------+ + # | | + # Attention | + # | | + # MatMul | + # | | + # SkipLayerNorm ------+ + + # SkipLayerNorm + x = x + x + x = self.ln(x) + residual = x + + # Attention + MatMul + x = self.attn(x) + + # SkipLayerNorm + x = residual + x + x = self.ln(x) + return x + + +class TestFusion(unittest.TestCase): + def verify_fusion(self, optimized_model, expected_model_filename): + optimized_model.topological_sort(is_deterministic=True) + + expected_model_path = os.path.join(os.path.dirname(__file__), "test_data", "models", expected_model_filename) + expected_model = OnnxModel(onnx.load(expected_model_path)) + expected_model.topological_sort(is_deterministic=True) + + nodes = optimized_model.model.graph.node + self.assertEqual(len(nodes), len(expected_model.model.graph.node)) + + for i in range(len(nodes)): + self.assertEqual(nodes[i], expected_model.model.graph.node[i]) + + for expected_initializer in expected_model.model.graph.initializer: + self.assertTrue( + OnnxModel.has_same_value( + optimized_model.get_initializer(expected_initializer.name), expected_initializer + ) + ) + + def export(self, model, inputs): + torch.onnx.export( + model, + args=inputs, + f=os.path.join(os.path.dirname(__file__), "export.onnx"), + export_params=True, + opset_version=14, + do_constant_folding=True, + ) + + def tearDown(self): + path = os.path.join(os.path.dirname(__file__), "export.onnx") + if os.path.exists(path): + os.remove(path) + + def test_phi_vision_layernorm(self): + if not torch.cuda.is_available(): + return + model = PhiVCLIPLayerNorm() + inputs = (torch.randn(1, 2, 20).to(torch.float16),) + self.export(model, inputs) + original_model = onnx.load(os.path.join(os.path.dirname(__file__), "export.onnx")) + options = FusionOptions("clip") + optimized_model = optimize_model( + original_model, + model_type="clip", + num_heads=2, + hidden_size=20, + optimization_options=options, + opt_level=0, + use_gpu=True, + ) + self.verify_fusion(optimized_model, "phi-3.5-v-instruct-vision-layernorm.onnx") + + def test_phi_vision_quickgelu(self): + model = PhiVCLIPQuickGelu() + inputs = (torch.randn(1, 2, 20),) + self.export(model, inputs) + original_model = onnx.load(os.path.join(os.path.dirname(__file__), "export.onnx")) + options = FusionOptions("clip") + optimized_model = optimize_model( + original_model, model_type="clip", num_heads=2, hidden_size=20, optimization_options=options, opt_level=0 + ) + self.verify_fusion(optimized_model, "phi-3.5-v-instruct-vision-quickgelu.onnx") + + def test_phi_vision_attention(self): + model = PhiVCLIPAttentionAndLayerNorm() + inputs = (torch.randn(1, 2, 20),) + self.export(model, inputs) + original_model = onnx.load(os.path.join(os.path.dirname(__file__), "export.onnx")) + options = FusionOptions("clip") + optimized_model = optimize_model( + original_model, model_type="clip", num_heads=2, hidden_size=20, optimization_options=options, opt_level=0 + ) + self.verify_fusion(optimized_model, "phi-3.5-v-instruct-vision-attention.onnx") + + +if __name__ == "__main__": + unittest.main() diff --git a/onnxruntime/test/python/transformers/test_rotary_embedding_fusion.py b/onnxruntime/test/python/transformers/test_rotary_embedding_fusion.py index 7bca48c29019e..89ef0342fab74 100644 --- a/onnxruntime/test/python/transformers/test_rotary_embedding_fusion.py +++ b/onnxruntime/test/python/transformers/test_rotary_embedding_fusion.py @@ -6,7 +6,6 @@ import os import sys import unittest -from typing import List import numpy as np import onnx @@ -23,7 +22,7 @@ from onnxruntime.transformers.optimizer import optimize_model -def float_tensor(name: str, shape: List[int], random=False): +def float_tensor(name: str, shape: list[int], random=False): low = 0.0 high = 1.0 total_elements = 1 @@ -113,7 +112,7 @@ def create_inputs_and_outputs(self, model_type: str = ""): outputs.append(helper.make_tensor_value_info("past_seq_len_plus_zero", TensorProto.FLOAT, [1])) return inputs, outputs - def create_fused_model(self, interleaved: bool, initializers: List[TensorProto]): + def create_fused_model(self, interleaved: bool, initializers: list[TensorProto]): inputs, outputs = self.create_inputs_and_outputs() rope_node = helper.make_node( @@ -385,7 +384,7 @@ def create_apply_rope_path(self): return x_half_shape_nodes + rotate_half_nodes + x_embed_nodes - def create_test_model(self, model_type: str, use_redundant_squeeze_ops: bool, initializers: List[TensorProto]): + def create_test_model(self, model_type: str, use_redundant_squeeze_ops: bool, initializers: list[TensorProto]): apply_rope_nodes = self.create_apply_rope_path() cache_nodes = self.create_cache_path(model_type, use_redundant_squeeze_ops) inputs, outputs = self.create_inputs_and_outputs(model_type) diff --git a/onnxruntime/test/python/transformers/test_rotary_mha_fusion.py b/onnxruntime/test/python/transformers/test_rotary_mha_fusion.py index 373ad86ced1a7..0ec5c684532cc 100644 --- a/onnxruntime/test/python/transformers/test_rotary_mha_fusion.py +++ b/onnxruntime/test/python/transformers/test_rotary_mha_fusion.py @@ -6,7 +6,6 @@ import os import sys import unittest -from typing import List import numpy as np import onnx @@ -23,7 +22,7 @@ from onnxruntime.transformers.optimizer import optimize_model -def float_tensor(name: str, shape: List[int], random=False): +def float_tensor(name: str, shape: list[int], random=False): low = 0.0 high = 1.0 total_elements = 1 @@ -157,8 +156,8 @@ def create_rotary_embeddings( is_fused: bool, model_type: str, interleaved: bool, - inputs: List[TensorProto], - initializers: List[TensorProto], + inputs: list[TensorProto], + initializers: list[TensorProto], ): def get_first_rope_input(node_type: str): if is_fused or model_type == "llama2_msft": @@ -974,7 +973,7 @@ def create_qkv_path(self, model_type: str): return qkv_nodes + [transpose_qkv_node, reshape_qkv_2_node] # noqa: RUF005 - def create_concat_unsqueeze_paths(self, model_type: str, reshape_nodes: List[NodeProto]): + def create_concat_unsqueeze_paths(self, model_type: str, reshape_nodes: list[NodeProto]): # Create initial shape paths shape_0_node = helper.make_node( "Shape", @@ -1026,14 +1025,14 @@ def create_concat_unsqueeze_paths(self, model_type: str, reshape_nodes: List[Nod unsqueeze_0_node = helper.make_node( "Unsqueeze", inputs=[gather_0_node.output[0] if not use_mul_and_add_nodes_0 else "mul_extra_out", "zero"], - outputs=[f"unsqueeze_extra_{2*i}"], - name=f"Unsqueeze_extra_{2*i}", + outputs=[f"unsqueeze_extra_{2 * i}"], + name=f"Unsqueeze_extra_{2 * i}", ) unsqueeze_1_node = helper.make_node( "Unsqueeze", inputs=[gather_1_node.output[0] if not use_mul_and_add_nodes_1 else "add_extra_out", "zero"], - outputs=[f"unsqueeze_extra_{2*i + 1}"], - name=f"Unsqueeze_extra_{2*i + 1}", + outputs=[f"unsqueeze_extra_{2 * i + 1}"], + name=f"Unsqueeze_extra_{2 * i + 1}", ) reshape_name = reshape_node.name @@ -1097,7 +1096,7 @@ def create_end_nodes(self, model_type): ) return [matmul_o_node, end_node] - def create_fused_model(self, model_type: str, interleaved: bool, initializers: List[TensorProto]): + def create_fused_model(self, model_type: str, interleaved: bool, initializers: list[TensorProto]): inputs, outputs = self.create_inputs_and_outputs(model_type) matmul_nodes = self.create_matmul_nodes(True, model_type=model_type) rope_nodes = self.create_rotary_embeddings(True, model_type, interleaved, inputs, initializers) @@ -1134,7 +1133,7 @@ def create_fused_model(self, model_type: str, interleaved: bool, initializers: L model = helper.make_model(graph, opset_imports=[opset_import]) return model - def create_test_model(self, model_type: str, interleaved: bool, initializers: List[TensorProto]): + def create_test_model(self, model_type: str, interleaved: bool, initializers: list[TensorProto]): inputs, outputs = self.create_inputs_and_outputs(model_type) matmul_nodes = self.create_matmul_nodes(False, model_type) rope_nodes = self.create_rotary_embeddings(False, model_type, interleaved, inputs, initializers) diff --git a/onnxruntime/test/python/transformers/test_simplified_layernorm_fusion.py b/onnxruntime/test/python/transformers/test_simplified_layernorm_fusion.py index e86bdda7baffb..95639958dbb2e 100644 --- a/onnxruntime/test/python/transformers/test_simplified_layernorm_fusion.py +++ b/onnxruntime/test/python/transformers/test_simplified_layernorm_fusion.py @@ -5,7 +5,6 @@ import os import unittest -from typing import List import numpy as np import onnx @@ -22,7 +21,7 @@ from onnxruntime.transformers.optimizer import optimize_model -def float_tensor(name: str, shape: List[int], random=False): +def float_tensor(name: str, shape: list[int], random=False): low = 0.0 high = 1.0 total_elements = 1 @@ -115,7 +114,7 @@ def create_inputs_and_outputs(self, start_node_type: str): ] return inputs, outputs, start_node - def create_fused_model(self, start_node_type: str, initializers: List[TensorProto]): + def create_fused_model(self, start_node_type: str, initializers: list[TensorProto]): inputs, outputs, start_node = self.create_inputs_and_outputs(start_node_type) sln_node = helper.make_node( @@ -139,7 +138,7 @@ def create_fused_model(self, start_node_type: str, initializers: List[TensorProt return model # Notation follows https://onnx.ai/onnx/operators/onnx__LayerNormalization.html#summary - def create_test_model(self, start_node_type: str, first_parent_idx: int, initializers: List[TensorProto]): + def create_test_model(self, start_node_type: str, first_parent_idx: int, initializers: list[TensorProto]): end_node = helper.make_node( "Mul", inputs=["scale", "Normalized"] if first_parent_idx == 1 else ["Normalized", "scale"], @@ -197,7 +196,7 @@ def create_test_model(self, start_node_type: str, first_parent_idx: int, initial model = helper.make_model(graph, opset_imports=[opset_import]) return model - def check_models(self, start_node_type: str, first_parent_idx: int, initializers: List[TensorProto]): + def check_models(self, start_node_type: str, first_parent_idx: int, initializers: list[TensorProto]): expected_model_filename = "expected_model.onnx" expected_model = self.create_fused_model(start_node_type, initializers) onnx.save(expected_model, expected_model_filename) diff --git a/onnxruntime/test/python/transformers/test_skip_layer_norm_fusion.py b/onnxruntime/test/python/transformers/test_skip_layer_norm_fusion.py index 5b3a3f18cd744..a55ff5aa91519 100644 --- a/onnxruntime/test/python/transformers/test_skip_layer_norm_fusion.py +++ b/onnxruntime/test/python/transformers/test_skip_layer_norm_fusion.py @@ -6,7 +6,6 @@ import os import unittest -from typing import Dict, List import numpy as np import onnx @@ -21,7 +20,7 @@ from onnxruntime.transformers.optimizer import optimize_model -def float_tensor(name: str, shape: List[int], random=False): +def float_tensor(name: str, shape: list[int], random=False): low = 0.0 high = 1.0 total_elements = 1 @@ -35,9 +34,9 @@ class TestFusion(unittest.TestCase): def verify_skip_layer_norm_fusion( self, model_path: str, - expected_counter: Dict[str, int], - expected_inputs: List[str], - expected_outputs: List[str], + expected_counter: dict[str, int], + expected_inputs: list[str], + expected_outputs: list[str], ): options = FusionOptions("bert") optimized_model = optimize_model(model_path, optimization_options=options, opt_level=0) diff --git a/onnxruntime/test/python/transformers/test_sparse_attention.py b/onnxruntime/test/python/transformers/test_sparse_attention.py index 5dbb9a277e45a..eac6bbdc3dd12 100644 --- a/onnxruntime/test/python/transformers/test_sparse_attention.py +++ b/onnxruntime/test/python/transformers/test_sparse_attention.py @@ -6,9 +6,9 @@ """ Parity test and benchmark performance of SparseAttention. Requires Nvidia GPU of Compute Capability 7.5 or above. """ + import math import unittest -from typing import Optional, Union import torch from benchmark_mha import InputFormats @@ -33,7 +33,7 @@ def __init__( num_heads: int, kv_num_heads: int, head_size: int, - softmax_scale: Optional[float], + softmax_scale: float | None, do_rotary: bool, rotary_interleaved: bool, provider: str = "CUDAExecutionProvider", @@ -601,8 +601,8 @@ def group_query_attention_reference( key: Tensor, value: Tensor, config: GroupQueryAttentionConfig, - scale: Optional[float] = None, - mask: Optional[Tensor] = None, + scale: float | None = None, + mask: Tensor | None = None, ): if scale is None: scale = 1.0 / (config.head_size**0.5) @@ -703,7 +703,7 @@ def infer(self): def create_ort_session( - config: Union[SparseAttentionConfig, GroupQueryAttentionConfig], session_options=None, enable_cuda_graph=False + config: SparseAttentionConfig | GroupQueryAttentionConfig, session_options=None, enable_cuda_graph=False ) -> CudaSession: if isinstance(config, SparseAttentionConfig): onnx_model_str = create_sparse_attention_onnx_model(config) diff --git a/onnxruntime/test/python/transformers/whisper_model_generator.py b/onnxruntime/test/python/transformers/whisper_model_generator.py index a57b45cbc5ea3..f1a692b7694cb 100644 --- a/onnxruntime/test/python/transformers/whisper_model_generator.py +++ b/onnxruntime/test/python/transformers/whisper_model_generator.py @@ -4,7 +4,6 @@ # license information. # -------------------------------------------------------------------------- -from typing import List import numpy as np import onnx @@ -13,7 +12,7 @@ # Adapted from bert_model_generator.py -def get_tensor_and_weight(name: str, shape: List[int], random=False, zeros=False): +def get_tensor_and_weight(name: str, shape: list[int], random=False, zeros=False): low = 0.0 high = 1.0 total_elements = 1 @@ -22,7 +21,9 @@ def get_tensor_and_weight(name: str, shape: List[int], random=False, zeros=False weights = ( [np.random.uniform(low, high) for _ in range(total_elements)] if random - else [0.0] * total_elements if zeros else [1.0] * total_elements + else [0.0] * total_elements + if zeros + else [1.0] * total_elements ) return helper.make_tensor(name, TensorProto.FLOAT, shape, weights), weights diff --git a/onnxruntime/test/qnn_ctx_gen/main.cc b/onnxruntime/test/qnn_ctx_gen/main.cc index 3be0bd253c8a4..bb5007b40b072 100644 --- a/onnxruntime/test/qnn_ctx_gen/main.cc +++ b/onnxruntime/test/qnn_ctx_gen/main.cc @@ -16,7 +16,6 @@ #include "core/common/logging/sinks/clog_sink.h" #include "core/graph/model.h" -#include "core/providers/shared/utils/utils.h" #include "core/session/environment.h" #include "core/common/logging/logging.h" @@ -31,6 +30,24 @@ static void CheckStatus(const Status& status) { } } +static int64_t GetNodeAttr(const Node& node, const std::string& attr_name, int64_t default_val) { + const auto& attributes = node.GetAttributes(); + if (auto entry = attributes.find(attr_name); entry != attributes.end()) { + return entry->second.i(); + } + + return default_val; +} + +static const std::string& GetNodeAttr(const Node& node, const std::string& attr_name, const std::string& default_val) { + const auto& attributes = node.GetAttributes(); + if (auto entry = attributes.find(attr_name); entry != attributes.end()) { + return entry->second.s(); + } + + return default_val; +} + // from the last context cache Onnx model, find the EPContext node with main_context=1, // and get the QNN context binary file name, this context binary contains all graphs from all Onnx models // get the max spill fill buffer size @@ -44,11 +61,10 @@ static void GetLastContextBinaryFileName(const std::basic_string last auto& ctx_graph = ctx_model->MainGraph(); for (auto& node : ctx_graph.Nodes()) { if (node.OpType() == "EPContext") { - NodeAttrHelper node_helper(node); - int64_t is_main_context = node_helper.Get("main_context", static_cast(0)); - max_size = node_helper.Get("max_size", static_cast(0)); + int64_t is_main_context = GetNodeAttr(node, "main_context", static_cast(0)); + max_size = GetNodeAttr(node, "max_size", static_cast(0)); if (1 == is_main_context) { - last_ctx_bin_file = node_helper.Get("ep_cache_context", ""); + last_ctx_bin_file = GetNodeAttr(node, "ep_cache_context", ""); return; } } @@ -72,10 +88,9 @@ static void UpdateEpContextModel(const std::vector> for (auto& node : ctx_graph.Nodes()) { if (node.OpType() == "EPContext") { - NodeAttrHelper node_helper(node); - int64_t is_main_context = node_helper.Get("main_context", static_cast(0)); + int64_t is_main_context = GetNodeAttr(node, "main_context", static_cast(0)); if (1 == is_main_context) { - std::string old_qnn_ctx_binary_file_name = node_helper.Get("ep_cache_context", ""); + std::string old_qnn_ctx_binary_file_name = GetNodeAttr(node, "ep_cache_context", ""); auto file_path = path.replace_filename(old_qnn_ctx_binary_file_name); std::remove(file_path.string().c_str()); node.ClearAttribute("ep_cache_context"); diff --git a/onnxruntime/test/shared_lib/test_inference.cc b/onnxruntime/test/shared_lib/test_inference.cc index e8c8c8db8d08f..2ea99151c2bfd 100644 --- a/onnxruntime/test/shared_lib/test_inference.cc +++ b/onnxruntime/test/shared_lib/test_inference.cc @@ -1934,6 +1934,59 @@ TEST(ReducedOpsBuildTest, test_excluded_ops) { } #endif +#if defined(USE_QNN) + +// Returns true if QNN EP was created and QNN HTP shared memory allocator is available, false otherwise. +static bool CreateSessionWithQnnEpAndQnnHtpSharedMemoryAllocator(PATH_TYPE model_path, Ort::Session& session) { +#if defined(__aarch64__) || defined(_M_ARM64) || defined(__linux__) + constexpr bool use_htp_backend = true; +#else + constexpr bool use_htp_backend = false; +#endif + +#if defined(_WIN32) + const char* backend_path = use_htp_backend ? "QnnHtp.dll" : "QnnCpu.dll"; +#else + const char* backend_path = use_htp_backend ? "libQnnHtp.so" : "libQnnCpu.so"; +#endif + + Ort::SessionOptions session_options; + session_options.AppendExecutionProvider("QNN", + {{"enable_htp_shared_memory_allocator", "1"}, + {"backend_path", backend_path}}); + + try { + session = Ort::Session{*ort_env, model_path, session_options}; + return true; + } catch (const Ort::Exception& e) { + // handle particular exception that indicates that the libcdsprpc.so / dll can't be loaded + // NOTE: To run this on a local Windows ARM64 device, you need to copy libcdsprpc.dll to the build directory: + // - Open File Explorer + // - Go to C:/Windows/System32/DriverStore/FileRepository/ + // - Search for a folder that begins with qcnspmcdm8380.inf_arm64_ and open it + // - Copy the libcdsprpc.dll into the build/[PATH CONTAINING onnxruntime.dll] directory of the application. + // TODO(adrianlizarraga): Update CMake build for unittests to automatically copy libcdsprpc.dll into build directory + std::string_view error_message = e.what(); + +#if defined(_WIN32) + std::string_view expected_error_message = "Failed to load libcdsprpc.dll"; +#else + std::string_view expected_error_message = "Failed to load libcdsprpc.so"; +#endif + + if (e.GetOrtErrorCode() == ORT_FAIL && + error_message.find(expected_error_message) != std::string_view::npos) { + session = Ort::Session{nullptr}; + return false; + } + + // propagate other exceptions + throw; + } +} + +#endif // defined(USE_QNN) + TEST(CApiTest, get_allocator_cpu) { Ort::SessionOptions session_options; Ort::ThrowOnError(OrtSessionOptionsAppendExecutionProvider_CPU(session_options, 1)); @@ -2001,6 +2054,32 @@ TEST(CApiTest, get_allocator_rocm) { } #endif +#if defined(USE_QNN) + +TEST(CApiTest, get_allocator_qnn_htp_shared) { + Ort::Session session{nullptr}; + + if (!CreateSessionWithQnnEpAndQnnHtpSharedMemoryAllocator(NAMED_AND_ANON_DIM_PARAM_URI, session)) { + GTEST_SKIP() << "HTP shared memory allocator is unavailable."; + } + + Ort::MemoryInfo info_qnn_htp_shared("QnnHtpShared", OrtAllocatorType::OrtDeviceAllocator, 0, OrtMemTypeDefault); + Ort::Allocator qnn_htp_shared_allocator(session, info_qnn_htp_shared); + + auto allocator_info = qnn_htp_shared_allocator.GetInfo(); + ASSERT_EQ(allocator_info, info_qnn_htp_shared); + + void* p = qnn_htp_shared_allocator.Alloc(1024); + ASSERT_NE(p, nullptr); + qnn_htp_shared_allocator.Free(p); + + auto mem_allocation = qnn_htp_shared_allocator.GetAllocation(1024); + ASSERT_NE(mem_allocation.get(), nullptr); + ASSERT_EQ(mem_allocation.size(), size_t{1024}); +} + +#endif // defined(USE_QNN) + TEST(CApiTest, io_binding) { Ort::SessionOptions session_options; Ort::ThrowOnError(OrtSessionOptionsAppendExecutionProvider_CPU(session_options, 1)); @@ -2178,6 +2257,104 @@ TEST(CApiTest, io_binding_cuda) { } #endif +#if defined(USE_QNN) + +TEST(CApiTest, io_binding_qnn_htp_shared) { + Ort::Session session{nullptr}; + if (!CreateSessionWithQnnEpAndQnnHtpSharedMemoryAllocator(MODEL_URI, session)) { + GTEST_SKIP() << "HTP shared memory allocator is unavailable."; + } + + Ort::MemoryInfo info_qnn_htp_shared("QnnHtpShared", OrtAllocatorType::OrtDeviceAllocator, 0, OrtMemTypeDefault); + + Ort::Allocator qnn_htp_shared_allocator(session, info_qnn_htp_shared); + auto allocator_info = qnn_htp_shared_allocator.GetInfo(); + ASSERT_EQ(info_qnn_htp_shared, allocator_info); + + const std::array x_shape = {3, 2}; + std::array x_values = {1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f}; + auto input_data = qnn_htp_shared_allocator.GetAllocation(x_values.size() * sizeof(float)); + ASSERT_NE(input_data.get(), nullptr); + memcpy(input_data.get(), x_values.data(), sizeof(float) * x_values.size()); + + // Create an OrtValue tensor backed by data on QNN HTP shared memory + Ort::Value bound_x = Ort::Value::CreateTensor(info_qnn_htp_shared, reinterpret_cast(input_data.get()), x_values.size(), + x_shape.data(), x_shape.size()); + + // Setup expected output (y) from model. Note that QNN EP runs float32 operators as float16, + // so the output will not be exactly equal. + const std::array expected_y_shape = {3, 2}; + const std::array expected_y = {1.0f, 4.0f, 9.0f, 16.0f, 25.0f, 36.0f}; + constexpr float y_max_abs_err = 1e-5f; + auto output_data = qnn_htp_shared_allocator.GetAllocation(expected_y.size() * sizeof(float)); + ASSERT_NE(output_data.get(), nullptr); + + // Create an OrtValue tensor backed by data on QNN HTP shared memory + Ort::Value bound_y = Ort::Value::CreateTensor(info_qnn_htp_shared, reinterpret_cast(output_data.get()), + expected_y.size(), expected_y_shape.data(), expected_y_shape.size()); + + Ort::IoBinding binding(session); + binding.BindInput("X", bound_x); + binding.BindOutput("Y", bound_y); + + session.Run(Ort::RunOptions(), binding); + + // Check the values against the bound raw memory + { + gsl::span y{reinterpret_cast(output_data.get()), expected_y.size()}; + EXPECT_THAT(expected_y, ::testing::Pointwise(::testing::FloatNear(y_max_abs_err), y)); + } + + // Now compare values via GetOutputValues + { + std::vector output_values = binding.GetOutputValues(); + ASSERT_EQ(output_values.size(), 1U); + const Ort::Value& Y_value = output_values[0]; + ASSERT_TRUE(Y_value.IsTensor()); + Ort::TensorTypeAndShapeInfo type_info = Y_value.GetTensorTypeAndShapeInfo(); + ASSERT_EQ(ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT, type_info.GetElementType()); + auto count = type_info.GetElementCount(); + ASSERT_EQ(expected_y.size(), count); + + gsl::span y{Y_value.GetTensorData(), count}; + EXPECT_THAT(expected_y, ::testing::Pointwise(::testing::FloatNear(y_max_abs_err), y)); + } + + { + std::vector output_names = binding.GetOutputNames(); + ASSERT_EQ(1U, output_names.size()); + ASSERT_EQ(output_names[0].compare("Y"), 0); + } + + // Now replace binding of Y with an on device binding instead of pre-allocated memory. + // This is when we can not allocate an OrtValue due to unknown dimensions + { + binding.BindOutput("Y", info_qnn_htp_shared); + session.Run(Ort::RunOptions(), binding); + } + + // Check the output value allocated based on the device binding. + { + std::vector output_values = binding.GetOutputValues(); + ASSERT_EQ(output_values.size(), 1U); + const Ort::Value& Y_value = output_values[0]; + ASSERT_TRUE(Y_value.IsTensor()); + Ort::TensorTypeAndShapeInfo type_info = Y_value.GetTensorTypeAndShapeInfo(); + ASSERT_EQ(ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT, type_info.GetElementType()); + auto count = type_info.GetElementCount(); + ASSERT_EQ(expected_y.size(), count); + + gsl::span y{Y_value.GetTensorData(), count}; + EXPECT_THAT(expected_y, ::testing::Pointwise(::testing::FloatNear(y_max_abs_err), y)); + } + + // Clean up + binding.ClearBoundInputs(); + binding.ClearBoundOutputs(); +} + +#endif // defined(USE_QNN) + #if defined(USE_CUDA) || defined(USE_TENSORRT) || defined(USE_ROCM) || defined(USE_DML) TEST(CApiTest, basic_cuda_graph) { const auto& api = Ort::GetApi(); diff --git a/onnxruntime/test/testdata/CNTK/gen.py b/onnxruntime/test/testdata/CNTK/gen.py index 5a3ca461f471a..b5f39bcb448f9 100644 --- a/onnxruntime/test/testdata/CNTK/gen.py +++ b/onnxruntime/test/testdata/CNTK/gen.py @@ -23,7 +23,7 @@ def SaveTensorProto(file_path, variable, data, name): # noqa: N802 def SaveData(test_data_dir, prefix, variables, data_list, name_replacements=None): # noqa: N802 if isinstance(data_list, np.ndarray): data_list = [data_list] - for (i, d), v in zip(enumerate(data_list), variables): + for (i, d), v in zip(enumerate(data_list), variables, strict=False): SaveTensorProto( os.path.join(test_data_dir, f"{prefix}_{i}.pb"), v, diff --git a/onnxruntime/test/testdata/custom_op_local_function/custom_op_test_local_function.py b/onnxruntime/test/testdata/custom_op_local_function/custom_op_test_local_function.py index 3e353d4142554..7916d93c3e531 100644 --- a/onnxruntime/test/testdata/custom_op_local_function/custom_op_test_local_function.py +++ b/onnxruntime/test/testdata/custom_op_local_function/custom_op_test_local_function.py @@ -39,7 +39,7 @@ def test_basic_all(self): ) x = np.arange(2**2).reshape((2,) * 2).astype(np.float32) t = np.arange(8).reshape((2, 4)).astype(np.float32) - got = sess.run(None, dict(X=x))[0] + got = sess.run(None, {"X": x})[0] np.testing.assert_allclose(t, got, atol=1e-5) diff --git a/onnxruntime/test/testdata/dummy_t5_model_generator.py b/onnxruntime/test/testdata/dummy_t5_model_generator.py index 1ecd8b9ee9c92..00d9231fc85ce 100644 --- a/onnxruntime/test/testdata/dummy_t5_model_generator.py +++ b/onnxruntime/test/testdata/dummy_t5_model_generator.py @@ -1,4 +1,4 @@ -""" Script to generate a dummy ONNX model emulating T5 model with BeamSearch op. """ +"""Script to generate a dummy ONNX model emulating T5 model with BeamSearch op.""" import argparse diff --git a/onnxruntime/test/testdata/dynamic_quantize_matmul_test.py b/onnxruntime/test/testdata/dynamic_quantize_matmul_test.py index 8e6dbe5ea581d..594da08abb1fb 100644 --- a/onnxruntime/test/testdata/dynamic_quantize_matmul_test.py +++ b/onnxruntime/test/testdata/dynamic_quantize_matmul_test.py @@ -1,5 +1,3 @@ -from enum import Enum # noqa: F401 - import onnx from onnx import TensorProto, helper diff --git a/onnxruntime/test/testdata/ep_partitioning_tests.py b/onnxruntime/test/testdata/ep_partitioning_tests.py index 6c8322bb9bd62..367cafb795bad 100644 --- a/onnxruntime/test/testdata/ep_partitioning_tests.py +++ b/onnxruntime/test/testdata/ep_partitioning_tests.py @@ -1,4 +1,3 @@ -import numpy as np # noqa: F401 import onnx from onnx import TensorProto, helper diff --git a/onnxruntime/test/testdata/matmul_integer_to_float.py b/onnxruntime/test/testdata/matmul_integer_to_float.py index e6c51009018f9..0c1ea47fff5b1 100644 --- a/onnxruntime/test/testdata/matmul_integer_to_float.py +++ b/onnxruntime/test/testdata/matmul_integer_to_float.py @@ -1,5 +1,3 @@ -from enum import Enum # noqa: F401 - import onnx from onnx import TensorProto, helper diff --git a/onnxruntime/test/testdata/onnx_backend_test_series_filters.jsonc b/onnxruntime/test/testdata/onnx_backend_test_series_filters.jsonc index 0540fb3912e81..272ea37fcc70c 100644 --- a/onnxruntime/test/testdata/onnx_backend_test_series_filters.jsonc +++ b/onnxruntime/test/testdata/onnx_backend_test_series_filters.jsonc @@ -323,46 +323,7 @@ "^test_dequantizelinear_int4", "^test_dequantizelinear_uint4", "^test_quantizelinear_int4", - "^test_quantizelinear_uint4", - // onnx 1.17.0 op tests: skip until implemented in ORT - "^test_acos*", // Could not find an implementation for Acos(22) - "^test_acosh*", // Could not find an implementation for Acosh(22) - "^test_asin*", // Could not find an implementation for Asin(22) - "^test_asinh*", // Could not find an implementation for Asinh(22) - "^test_atan*", // Could not find an implementation for Atan(22) - "^test_atanh*", // Could not find an implementation for Atanh(22) - "^test_basic_conv_with_padding*", // Could not find an implementation for Conv(22) - "^test_basic_conv_without_padding*", // Could not find an implementation for Conv(22) - "^test_conv*", // Could not find an implementation for Conv(22) - "^test_convtranspose*", // Could not find an implementation for ConvTranspose(22) - "^test_cos*", // Could not find an implementation for Cos(22) - "^test_cosh*", // Could not find an implementation for Cosh(22) - "^test_det*", // Could not find an implementation for Det(22) - "^test_dropout*", // Could not find an implementation for Dropout(22) - "^test_elu*", // Could not find an implementation for Elu(22) - "^test_eyelike*", // Could not find an implementation for EyeLike(22) - "^test_globalaveragepool*", // Could not find an implementation for GlobalAveragePool(22) - "^test_globalmaxpool*", // Could not find an implementation for GlobalMaxPool(22) - "^test_gridsample*", // Could not find an implementation for GridSample(22) - "^test_gru*", // Could not find an implementation for GRU(22) - "^test_hardsigmoid*", // Could not find an implementation for HardSigmoid(22) - "^test_hardswish*", // Could not find an implementation for HardSigmoid(22) - "^test_instancenorm*", // Could not find an implementation for InstanceNormalization(22) - "^test_lppool*", // Could not find an implementation for LpPool(22) - "^test_lstm*", // Could not find an implementation for LSTM(22) - "^test_maxpool*", // Could not find an implementation for MaxPool(22) - "^test_maxunpool*", // Could not find an implementation for MaxUnpool(22) - "^test_mish*", // Could not find an implementation for Softplus(22) - "^test_rnn*", // Could not find an implementation for RNN(22) - "^test_round*", // Could not find an implementation for Round(22) - "^test_selu*", // Could not find an implementation for Selu(22) - "^test_simple_rnn*", // Could not find an implementation for RNN(22) - "^test_sin*", // Could not find an implementation for Sin(22) - "^test_sinh*", // Could not find an implementation for Sinh(22) - "^test_softplus*", // Could not find an implementation for Softplus(22) - "^test_softsign*", // Could not find an implementation for Softsign(22) - "^test_tan*", // Could not find an implementation for Tan(22) - "^test_thresholdedrelu*" // Could not find an implementation for ThresholdedRelu(22) + "^test_quantizelinear_uint4" ], "current_failing_tests_x86": [ "^test_vgg19", @@ -739,7 +700,9 @@ "^test_layer_normalization_default_axis_cpu", "^test_gelu_tanh_1_expanded_cpu", "^test_gelu_tanh_2_expanded_cpu", - "^test_dynamicquantizelinear_expanded_cpu" + "^test_dynamicquantizelinear_expanded_cpu", + "^test_center_crop_pad_crop_negative_axes_hwc*", // failed due to new types or shape infer with negative axis for CenterCropPad. + "^test_center_crop_pad_crop_negative_axes_hwc_expanded*" // failed due to new types or shape infer with negative axis for CenterCropPad. ], "current_failing_tests_pure_DML": [ "^test_negative_log_likelihood_loss_input_shape_is_NCd1d2d3_none_no_weight_negative_ignore_index_cpu", diff --git a/onnxruntime/test/testdata/sparse_initializer_as_output.py b/onnxruntime/test/testdata/sparse_initializer_as_output.py index 1f85f5690dc0e..25d66b40a7c73 100644 --- a/onnxruntime/test/testdata/sparse_initializer_as_output.py +++ b/onnxruntime/test/testdata/sparse_initializer_as_output.py @@ -1,18 +1,14 @@ import argparse -import os # noqa: F401 import sys import traceback -from typing import Any, Callable, Dict, List, Optional, Sequence, Text, Tuple, TypeVar, Union, cast # noqa: F401 import numpy as np import onnx -from onnx import AttributeProto # noqa: F401 -from onnx import GraphProto # noqa: F401 -from onnx import SparseTensorProto # noqa: F401 -from onnx import mapping # noqa: F401 -from onnx import numpy_helper # noqa: F401 -from onnx import utils # noqa: F401 -from onnx import TensorProto, ValueInfoProto, helper +from onnx import ( + TensorProto, + ValueInfoProto, + helper, +) from onnx.helper import make_opsetid diff --git a/onnxruntime/test/testdata/sparse_to_dense_matmul.py b/onnxruntime/test/testdata/sparse_to_dense_matmul.py index ceabae9c2d3f6..5a8a00cc7748e 100644 --- a/onnxruntime/test/testdata/sparse_to_dense_matmul.py +++ b/onnxruntime/test/testdata/sparse_to_dense_matmul.py @@ -1,18 +1,13 @@ import argparse -import os # noqa: F401 import sys import traceback -from typing import Any, Callable, Dict, List, Optional, Sequence, Text, Tuple, TypeVar, Union, cast # noqa: F401 -import numpy as np # noqa: F401 import onnx -from onnx import AttributeProto # noqa: F401 -from onnx import GraphProto # noqa: F401 -from onnx import SparseTensorProto # noqa: F401 -from onnx import mapping # noqa: F401 -from onnx import numpy_helper # noqa: F401 -from onnx import utils # noqa: F401 -from onnx import TensorProto, ValueInfoProto, helper +from onnx import ( + TensorProto, + ValueInfoProto, + helper, +) from onnx.helper import make_opsetid diff --git a/onnxruntime/test/testdata/test_data_generation/adamw_test/adamw_test_data_generator.py b/onnxruntime/test/testdata/test_data_generation/adamw_test/adamw_test_data_generator.py index 443444044bb8d..430a9a345e333 100644 --- a/onnxruntime/test/testdata/test_data_generation/adamw_test/adamw_test_data_generator.py +++ b/onnxruntime/test/testdata/test_data_generation/adamw_test/adamw_test_data_generator.py @@ -2,7 +2,7 @@ # Licensed under the MIT License. """This file is used to generate test data for Adam optimizer tests in - orttraining/orttraining/test/training_ops/cuda/optimizer/adamw_test.cc.""" +orttraining/orttraining/test/training_ops/cuda/optimizer/adamw_test.cc.""" import torch diff --git a/onnxruntime/test/testdata/test_data_generation/lr_scheduler/lr_scheduler_test_data_generator.py b/onnxruntime/test/testdata/test_data_generation/lr_scheduler/lr_scheduler_test_data_generator.py index 95dfd7fa36bd6..e4ecae4b18fe6 100644 --- a/onnxruntime/test/testdata/test_data_generation/lr_scheduler/lr_scheduler_test_data_generator.py +++ b/onnxruntime/test/testdata/test_data_generation/lr_scheduler/lr_scheduler_test_data_generator.py @@ -2,7 +2,7 @@ # Licensed under the MIT License. """This file is used to generate test data for LR scheduler optimizer tests in - orttraining/orttraining/test/training_api/core/training_api_tests.cc.""" +orttraining/orttraining/test/training_api/core/training_api_tests.cc.""" import torch from torch.optim.lr_scheduler import LambdaLR @@ -33,7 +33,7 @@ def __init__(self, optimizer, warmup_steps, t_total, last_epoch=-1): super().__init__(optimizer, self.lr_lambda, last_epoch=last_epoch) def lr_lambda(self, step): - print(f"warmup_step_count_: {self.warmup_steps }, step: {step}, total_step_count_: {self.t_total}") + print(f"warmup_step_count_: {self.warmup_steps}, step: {step}, total_step_count_: {self.t_total}") if step < self.warmup_steps: return float(step) / float(max(1, self.warmup_steps)) return max(0.0, float(self.t_total - step) / float(max(1.0, self.t_total - self.warmup_steps))) @@ -60,7 +60,7 @@ def main(): import tempfile - fp = tempfile.NamedTemporaryFile() + fp = tempfile.NamedTemporaryFile() # noqa: SIM115 adamw_optimizer = torch.optim.AdamW(pt_model.parameters(), lr=1e-3) scheduler = WarmupLinearSchedule(adamw_optimizer, num_warmup_steps, num_training_steps) diff --git a/onnxruntime/test/testdata/test_data_generation/sgd_test/sgd_test_data_generator.py b/onnxruntime/test/testdata/test_data_generation/sgd_test/sgd_test_data_generator.py index 173225a21a52f..e601385dc8ad4 100644 --- a/onnxruntime/test/testdata/test_data_generation/sgd_test/sgd_test_data_generator.py +++ b/onnxruntime/test/testdata/test_data_generation/sgd_test/sgd_test_data_generator.py @@ -2,7 +2,7 @@ # Licensed under the MIT License. """This file is used to generate test data for SGD optimizer tests in - orttraining/orttraining/test/training_ops/cuda/optimizer/sgd_test.cc.""" +orttraining/orttraining/test/training_ops/cuda/optimizer/sgd_test.cc.""" import torch diff --git a/onnxruntime/test/testdata/training_api/ort_format/prepare_artifacts.py b/onnxruntime/test/testdata/training_api/ort_format/prepare_artifacts.py index 70e8c4ac011a9..b2ad2463aa8fa 100644 --- a/onnxruntime/test/testdata/training_api/ort_format/prepare_artifacts.py +++ b/onnxruntime/test/testdata/training_api/ort_format/prepare_artifacts.py @@ -2,7 +2,7 @@ # Licensed under the MIT License. """This file is used to generate test data for ort format model tests in - orttraining/orttraining/test/training_api/core/training_capi_tests.cc.""" +orttraining/orttraining/test/training_api/core/training_capi_tests.cc.""" import onnx import torch diff --git a/onnxruntime/test/testdata/transform/computation_reduction.py b/onnxruntime/test/testdata/transform/computation_reduction.py index 6f726a54261ed..af0a39636f9ee 100644 --- a/onnxruntime/test/testdata/transform/computation_reduction.py +++ b/onnxruntime/test/testdata/transform/computation_reduction.py @@ -1,6 +1,6 @@ import numpy as np import onnx -from onnx import AttributeProto, GraphProto, OperatorSetIdProto, TensorProto, helper, numpy_helper # noqa: F401 +from onnx import OperatorSetIdProto, TensorProto, helper, numpy_helper vocab_size = 256 # 30258 diff --git a/onnxruntime/test/testdata/transform/computation_reduction/gather/gather_reshape.py b/onnxruntime/test/testdata/transform/computation_reduction/gather/gather_reshape.py index 5b3d841e3fade..ce46d60a8a33c 100755 --- a/onnxruntime/test/testdata/transform/computation_reduction/gather/gather_reshape.py +++ b/onnxruntime/test/testdata/transform/computation_reduction/gather/gather_reshape.py @@ -82,7 +82,7 @@ def _create_model_proto( ["batch_size", 31, 16, 64], 1, [31], - [i for i in range(31)], + list(range(31)), [4], [0, 128, 16, 64], "gather_reshape_seqlen_dim2", diff --git a/onnxruntime/test/testdata/transform/computation_reduction/gathernd/gathernd_add.py b/onnxruntime/test/testdata/transform/computation_reduction/gathernd/gathernd_add.py index cd823ce8391c2..7caf7045ccb93 100755 --- a/onnxruntime/test/testdata/transform/computation_reduction/gathernd/gathernd_add.py +++ b/onnxruntime/test/testdata/transform/computation_reduction/gathernd/gathernd_add.py @@ -1,6 +1,6 @@ import numpy as np import onnx -from onnx import AttributeProto, GraphProto, OperatorSetIdProto, TensorProto, helper, numpy_helper # noqa: F401 +from onnx import OperatorSetIdProto, TensorProto, helper, numpy_helper X = helper.make_tensor_value_info("input", TensorProto.FLOAT, ["batch", "seqlen", 128]) unsqueezed_masked_lm_positions = helper.make_tensor_value_info( diff --git a/onnxruntime/test/testdata/transform/computation_reduction/gathernd/gathernd_div.py b/onnxruntime/test/testdata/transform/computation_reduction/gathernd/gathernd_div.py index ee25bef5c1161..86413b8679a56 100755 --- a/onnxruntime/test/testdata/transform/computation_reduction/gathernd/gathernd_div.py +++ b/onnxruntime/test/testdata/transform/computation_reduction/gathernd/gathernd_div.py @@ -1,6 +1,6 @@ import numpy as np import onnx -from onnx import AttributeProto, GraphProto, OperatorSetIdProto, TensorProto, helper, numpy_helper # noqa: F401 +from onnx import OperatorSetIdProto, TensorProto, helper, numpy_helper X = helper.make_tensor_value_info("input", TensorProto.FLOAT, ["batch", "seqlen", 128]) unsqueezed_masked_lm_positions = helper.make_tensor_value_info( diff --git a/onnxruntime/test/testdata/transform/computation_reduction/gathernd/gathernd_layernormalization.py b/onnxruntime/test/testdata/transform/computation_reduction/gathernd/gathernd_layernormalization.py index dc2abf1dda586..ffaf62a243359 100755 --- a/onnxruntime/test/testdata/transform/computation_reduction/gathernd/gathernd_layernormalization.py +++ b/onnxruntime/test/testdata/transform/computation_reduction/gathernd/gathernd_layernormalization.py @@ -1,6 +1,6 @@ import numpy as np import onnx -from onnx import AttributeProto, GraphProto, OperatorSetIdProto, TensorProto, helper, numpy_helper # noqa: F401 +from onnx import OperatorSetIdProto, TensorProto, helper, numpy_helper X = helper.make_tensor_value_info("input", TensorProto.FLOAT, ["batch", "seqlen", 128]) unsqueezed_masked_lm_positions = helper.make_tensor_value_info( diff --git a/onnxruntime/test/testdata/transform/computation_reduction/gathernd/gathernd_matmul.py b/onnxruntime/test/testdata/transform/computation_reduction/gathernd/gathernd_matmul.py index bc850c4031741..65767a8986746 100755 --- a/onnxruntime/test/testdata/transform/computation_reduction/gathernd/gathernd_matmul.py +++ b/onnxruntime/test/testdata/transform/computation_reduction/gathernd/gathernd_matmul.py @@ -1,6 +1,6 @@ import numpy as np import onnx -from onnx import AttributeProto, GraphProto, OperatorSetIdProto, TensorProto, helper, numpy_helper # noqa: F401 +from onnx import OperatorSetIdProto, TensorProto, helper, numpy_helper X = helper.make_tensor_value_info("input", TensorProto.FLOAT, ["batch", "seqlen", 128]) unsqueezed_masked_lm_positions = helper.make_tensor_value_info( diff --git a/onnxruntime/test/testdata/transform/concat_slice_elimination.py b/onnxruntime/test/testdata/transform/concat_slice_elimination.py index 9eade63328aec..97f0c6f243f60 100644 --- a/onnxruntime/test/testdata/transform/concat_slice_elimination.py +++ b/onnxruntime/test/testdata/transform/concat_slice_elimination.py @@ -1,8 +1,6 @@ -import random # noqa: F401 - import numpy as np import onnx -from onnx import GraphProto, OperatorSetIdProto, TensorProto, helper, numpy_helper # noqa: F401 +from onnx import OperatorSetIdProto, TensorProto, helper, numpy_helper batch = 3 hidden_size = 4 diff --git a/onnxruntime/test/testdata/transform/convert_qdq_ops_to_ms_domain.py b/onnxruntime/test/testdata/transform/convert_qdq_ops_to_ms_domain.py index e7fd4ac70f065..1dd4ae0aee3e0 100644 --- a/onnxruntime/test/testdata/transform/convert_qdq_ops_to_ms_domain.py +++ b/onnxruntime/test/testdata/transform/convert_qdq_ops_to_ms_domain.py @@ -24,6 +24,7 @@ - fusion/constant_folding_qdq_node_unit.graph_output.qdq_contrib.onnx - fusion/constant_folding_qdq_node_unit.graph_output.qdq16_contrib.onnx """ + from __future__ import annotations import argparse diff --git a/onnxruntime/test/testdata/transform/cse/generate.py b/onnxruntime/test/testdata/transform/cse/generate.py index ecca4f586f400..01d62422983b5 100644 --- a/onnxruntime/test/testdata/transform/cse/generate.py +++ b/onnxruntime/test/testdata/transform/cse/generate.py @@ -1,7 +1,7 @@ import os import onnx -from onnx import AttributeProto, GraphProto, TensorProto, helper, shape_inference # noqa: F401 +from onnx import TensorProto, helper, shape_inference _this_dir = os.path.abspath(os.path.dirname(__file__)) diff --git a/onnxruntime/test/testdata/transform/expand_elimination.py b/onnxruntime/test/testdata/transform/expand_elimination.py index 86340c9e2553c..226c23fa66389 100644 --- a/onnxruntime/test/testdata/transform/expand_elimination.py +++ b/onnxruntime/test/testdata/transform/expand_elimination.py @@ -1,6 +1,6 @@ import numpy as np import onnx -from onnx import GraphProto, OperatorSetIdProto, TensorProto, helper, numpy_helper # noqa: F401 +from onnx import OperatorSetIdProto, TensorProto, helper, numpy_helper X1 = helper.make_tensor_value_info("input1", TensorProto.FLOAT, [2, 1]) X2 = helper.make_tensor_value_info("input2", TensorProto.FLOAT, ["dynamic", 4]) diff --git a/onnxruntime/test/testdata/transform/fusion/attention_gen.py b/onnxruntime/test/testdata/transform/fusion/attention_gen.py index 19f46ab9f358a..6ff0ea5ba9983 100644 --- a/onnxruntime/test/testdata/transform/fusion/attention_gen.py +++ b/onnxruntime/test/testdata/transform/fusion/attention_gen.py @@ -1,5 +1,4 @@ import sys -from enum import Enum # noqa: F401 import onnx from onnx import TensorProto, helper diff --git a/onnxruntime/test/testdata/transform/fusion/constant_folding_with_shape_to_initializer.py b/onnxruntime/test/testdata/transform/fusion/constant_folding_with_shape_to_initializer.py index c49ae8b0a422c..65b37a8ed9dab 100644 --- a/onnxruntime/test/testdata/transform/fusion/constant_folding_with_shape_to_initializer.py +++ b/onnxruntime/test/testdata/transform/fusion/constant_folding_with_shape_to_initializer.py @@ -1,6 +1,6 @@ import numpy as np import onnx -from onnx import GraphProto, OperatorSetIdProto, TensorProto, helper, numpy_helper # noqa: F401 +from onnx import OperatorSetIdProto, TensorProto, helper, numpy_helper X = helper.make_tensor_value_info("input", TensorProto.FLOAT, [2, 4, 8]) Y = helper.make_tensor_value_info("output", TensorProto.FLOAT, [2, 4, 16]) diff --git a/onnxruntime/test/testdata/transform/fusion/div_mul.py b/onnxruntime/test/testdata/transform/fusion/div_mul.py index 8cd34a6b53fcf..e7b1f4632afbd 100644 --- a/onnxruntime/test/testdata/transform/fusion/div_mul.py +++ b/onnxruntime/test/testdata/transform/fusion/div_mul.py @@ -1,5 +1,3 @@ -from enum import Enum # noqa: F401 - import onnx from onnx import OperatorSetIdProto, TensorProto, helper diff --git a/onnxruntime/test/testdata/transform/fusion/dynamic_quantize_matmul.py b/onnxruntime/test/testdata/transform/fusion/dynamic_quantize_matmul.py index 3ec3cabbc8b77..e590b46129d7b 100644 --- a/onnxruntime/test/testdata/transform/fusion/dynamic_quantize_matmul.py +++ b/onnxruntime/test/testdata/transform/fusion/dynamic_quantize_matmul.py @@ -1,5 +1,3 @@ -from enum import Enum # noqa: F401 - import onnx from onnx import TensorProto, helper diff --git a/onnxruntime/test/testdata/transform/fusion/embed_layer_norm_gen.py b/onnxruntime/test/testdata/transform/fusion/embed_layer_norm_gen.py index 54fe7b808bf12..f83bedeb8012c 100644 --- a/onnxruntime/test/testdata/transform/fusion/embed_layer_norm_gen.py +++ b/onnxruntime/test/testdata/transform/fusion/embed_layer_norm_gen.py @@ -1,5 +1,3 @@ -from enum import Enum # noqa: F401 - import onnx from onnx import TensorProto, helper from packaging import version diff --git a/onnxruntime/test/testdata/transform/fusion/fast_gelu.py b/onnxruntime/test/testdata/transform/fusion/fast_gelu.py index 20d78b6684609..a16d7e66752bf 100644 --- a/onnxruntime/test/testdata/transform/fusion/fast_gelu.py +++ b/onnxruntime/test/testdata/transform/fusion/fast_gelu.py @@ -1,6 +1,6 @@ import numpy as np import onnx -from onnx import AttributeProto, GraphProto, OperatorSetIdProto, TensorProto, helper, numpy_helper # noqa: F401 +from onnx import OperatorSetIdProto, TensorProto, helper, numpy_helper # Gelu formula: x * 0.5 * (1.0 + tanh(0.7978845608028654 * x * (1.0 + 0.044715 * x * x))) diff --git a/onnxruntime/test/testdata/transform/fusion/fast_gelu2.py b/onnxruntime/test/testdata/transform/fusion/fast_gelu2.py index 718f924ae5902..6922f3ad0a82a 100644 --- a/onnxruntime/test/testdata/transform/fusion/fast_gelu2.py +++ b/onnxruntime/test/testdata/transform/fusion/fast_gelu2.py @@ -1,6 +1,6 @@ import numpy as np import onnx -from onnx import AttributeProto, GraphProto, OperatorSetIdProto, TensorProto, helper, numpy_helper # noqa: F401 +from onnx import OperatorSetIdProto, TensorProto, helper, numpy_helper # Gelu formula: x * 0.5 * (1.0 + tanh((sqrt(2 / pi) * (x + 0.044715 * pow(x, 3))))) has_bias = False # change it to True to generate fast_gelu_openai_with_bias.onnx diff --git a/onnxruntime/test/testdata/transform/fusion/fast_gelu3_with_casts.py b/onnxruntime/test/testdata/transform/fusion/fast_gelu3_with_casts.py index d7cfc351b8e97..d91e186296137 100644 --- a/onnxruntime/test/testdata/transform/fusion/fast_gelu3_with_casts.py +++ b/onnxruntime/test/testdata/transform/fusion/fast_gelu3_with_casts.py @@ -1,6 +1,6 @@ import numpy as np import onnx -from onnx import AttributeProto, GraphProto, OperatorSetIdProto, TensorProto, helper, numpy_helper # noqa: F401 +from onnx import OperatorSetIdProto, TensorProto, helper, numpy_helper # Gelu formula: x * 0.5 * (1.0 + tanh((sqrt(2 / pi) * (x + 0.044715 * pow(x, 3))))) diff --git a/onnxruntime/test/testdata/transform/fusion/gelu_gen.py b/onnxruntime/test/testdata/transform/fusion/gelu_gen.py index 428bb0ce00df0..8a4c3ae491215 100644 --- a/onnxruntime/test/testdata/transform/fusion/gelu_gen.py +++ b/onnxruntime/test/testdata/transform/fusion/gelu_gen.py @@ -1,6 +1,6 @@ import numpy as np import onnx -from onnx import AttributeProto, GraphProto, OperatorSetIdProto, TensorProto, helper, numpy_helper # noqa: F401 +from onnx import OperatorSetIdProto, TensorProto, helper, numpy_helper """ Generate test model for Gelu subgraph pattern 2: diff --git a/onnxruntime/test/testdata/transform/fusion/isinf_reducesum.py b/onnxruntime/test/testdata/transform/fusion/isinf_reducesum.py index c6e70fe478701..a9c88618c5c70 100644 --- a/onnxruntime/test/testdata/transform/fusion/isinf_reducesum.py +++ b/onnxruntime/test/testdata/transform/fusion/isinf_reducesum.py @@ -1,5 +1,3 @@ -from enum import Enum # noqa: F401 - import onnx from onnx import OperatorSetIdProto, TensorProto, helper diff --git a/onnxruntime/test/testdata/transform/fusion/layer_norm_t5_gen.py b/onnxruntime/test/testdata/transform/fusion/layer_norm_t5_gen.py index aa4b78f4525de..c0e2bc85f8248 100644 --- a/onnxruntime/test/testdata/transform/fusion/layer_norm_t5_gen.py +++ b/onnxruntime/test/testdata/transform/fusion/layer_norm_t5_gen.py @@ -1,5 +1,3 @@ -from enum import Enum # noqa: F401 - import onnx from onnx import OperatorSetIdProto, TensorProto, helper diff --git a/onnxruntime/test/testdata/transform/fusion/layer_norm_with_cast_2.py b/onnxruntime/test/testdata/transform/fusion/layer_norm_with_cast_2.py index 61b2e2249e7a3..fa83290138d87 100644 --- a/onnxruntime/test/testdata/transform/fusion/layer_norm_with_cast_2.py +++ b/onnxruntime/test/testdata/transform/fusion/layer_norm_with_cast_2.py @@ -1,6 +1,3 @@ -from enum import Enum # noqa: F401 - -import numpy as np # noqa: F401 import onnx from onnx import OperatorSetIdProto, TensorProto, helper diff --git a/onnxruntime/test/testdata/transform/fusion/matmul_integer_to_float.py b/onnxruntime/test/testdata/transform/fusion/matmul_integer_to_float.py index 018e5fb332dd0..f9b154c46fbd1 100644 --- a/onnxruntime/test/testdata/transform/fusion/matmul_integer_to_float.py +++ b/onnxruntime/test/testdata/transform/fusion/matmul_integer_to_float.py @@ -1,5 +1,3 @@ -from enum import Enum # noqa: F401 - import onnx from onnx import TensorProto, helper diff --git a/onnxruntime/test/testdata/transform/fusion/matmul_integer_to_float_large_tensor.py b/onnxruntime/test/testdata/transform/fusion/matmul_integer_to_float_large_tensor.py index 543517cc015ef..6b60a47255c5d 100644 --- a/onnxruntime/test/testdata/transform/fusion/matmul_integer_to_float_large_tensor.py +++ b/onnxruntime/test/testdata/transform/fusion/matmul_integer_to_float_large_tensor.py @@ -1,5 +1,3 @@ -from enum import Enum # noqa: F401 - import onnx from onnx import TensorProto, helper diff --git a/onnxruntime/test/testdata/transform/fusion/not_where.py b/onnxruntime/test/testdata/transform/fusion/not_where.py index 82a128153ac70..014d0b8fc531a 100644 --- a/onnxruntime/test/testdata/transform/fusion/not_where.py +++ b/onnxruntime/test/testdata/transform/fusion/not_where.py @@ -1,5 +1,3 @@ -from enum import Enum # noqa: F401 - import onnx from onnx import OperatorSetIdProto, TensorProto, helper diff --git a/onnxruntime/test/testdata/transform/id-elim.py b/onnxruntime/test/testdata/transform/id-elim.py index 1f7b6e2607702..eef8011e7fe23 100644 --- a/onnxruntime/test/testdata/transform/id-elim.py +++ b/onnxruntime/test/testdata/transform/id-elim.py @@ -1,6 +1,5 @@ -import numpy as np # noqa: F401 import onnx -from onnx import GraphProto, OperatorSetIdProto, TensorProto, helper, numpy_helper # noqa: F401 +from onnx import OperatorSetIdProto, TensorProto, helper X1 = helper.make_tensor_value_info("x1", TensorProto.INT64, [4, 4]) X2 = helper.make_tensor_value_info("x2", TensorProto.INT64, [4, 4]) diff --git a/onnxruntime/test/testdata/transform/id-scan9_sum.py b/onnxruntime/test/testdata/transform/id-scan9_sum.py index 7ffd2e21b7333..c813bbfc18d8e 100644 --- a/onnxruntime/test/testdata/transform/id-scan9_sum.py +++ b/onnxruntime/test/testdata/transform/id-scan9_sum.py @@ -1,6 +1,5 @@ -import numpy as np # noqa: F401 import onnx -from onnx import GraphProto, OperatorSetIdProto, TensorProto, helper, numpy_helper # noqa: F401 +from onnx import OperatorSetIdProto, TensorProto, helper initial = helper.make_tensor_value_info("initial", TensorProto.FLOAT, [2]) x = helper.make_tensor_value_info("x", TensorProto.FLOAT, [3, 2]) diff --git a/onnxruntime/test/testdata/transform/model_parallel/bart_mlp_megatron_basic_test.py b/onnxruntime/test/testdata/transform/model_parallel/bart_mlp_megatron_basic_test.py index 503d860baab67..7879bb4d4e0ff 100644 --- a/onnxruntime/test/testdata/transform/model_parallel/bart_mlp_megatron_basic_test.py +++ b/onnxruntime/test/testdata/transform/model_parallel/bart_mlp_megatron_basic_test.py @@ -1,6 +1,6 @@ import numpy as np import onnx -from onnx import AttributeProto, GraphProto, OperatorSetIdProto, TensorProto, helper, numpy_helper # noqa: F401 +from onnx import OperatorSetIdProto, TensorProto, helper, numpy_helper hidden_size = 4 weight_dim_to_split = 16 diff --git a/onnxruntime/test/testdata/transform/model_parallel/bart_self_attention_megatron_basic_test.py b/onnxruntime/test/testdata/transform/model_parallel/bart_self_attention_megatron_basic_test.py index 20bdebead3dac..886cd5c25fb08 100644 --- a/onnxruntime/test/testdata/transform/model_parallel/bart_self_attention_megatron_basic_test.py +++ b/onnxruntime/test/testdata/transform/model_parallel/bart_self_attention_megatron_basic_test.py @@ -1,8 +1,6 @@ -import random # noqa: F401 - import numpy as np import onnx -from onnx import GraphProto, OperatorSetIdProto, TensorProto, helper, numpy_helper # noqa: F401 +from onnx import OperatorSetIdProto, TensorProto, helper, numpy_helper batch = 6 hidden_size = 4 diff --git a/onnxruntime/test/testdata/transform/model_parallel/mlp_megatron_basic_test.py b/onnxruntime/test/testdata/transform/model_parallel/mlp_megatron_basic_test.py index 07487ee4880ed..5dec4899d59af 100644 --- a/onnxruntime/test/testdata/transform/model_parallel/mlp_megatron_basic_test.py +++ b/onnxruntime/test/testdata/transform/model_parallel/mlp_megatron_basic_test.py @@ -1,6 +1,6 @@ import numpy as np import onnx -from onnx import AttributeProto, GraphProto, OperatorSetIdProto, TensorProto, helper, numpy_helper # noqa: F401 +from onnx import OperatorSetIdProto, TensorProto, helper, numpy_helper hidden_size = 4 weight_dim_to_split = 16 diff --git a/onnxruntime/test/testdata/transform/model_parallel/self_attention_megatron_basic_test.py b/onnxruntime/test/testdata/transform/model_parallel/self_attention_megatron_basic_test.py index 306ad7d37403a..3749da038d93e 100644 --- a/onnxruntime/test/testdata/transform/model_parallel/self_attention_megatron_basic_test.py +++ b/onnxruntime/test/testdata/transform/model_parallel/self_attention_megatron_basic_test.py @@ -1,6 +1,6 @@ import numpy as np import onnx -from onnx import GraphProto, OperatorSetIdProto, TensorProto, helper, numpy_helper # noqa: F401 +from onnx import OperatorSetIdProto, TensorProto, helper, numpy_helper hidden_size = 4 attention_head = 2 diff --git a/onnxruntime/test/testdata/transform/recompute/recompute_test_graph_generator.py b/onnxruntime/test/testdata/transform/recompute/recompute_test_graph_generator.py index 2c734feda7fac..b7552d9a26ca4 100644 --- a/onnxruntime/test/testdata/transform/recompute/recompute_test_graph_generator.py +++ b/onnxruntime/test/testdata/transform/recompute/recompute_test_graph_generator.py @@ -2,11 +2,11 @@ # Licensed under the MIT License. """This file is used to generate test data for MemoryOptimizer tests in - onnxruntime/test/optimizer/memory_optimizer_test.cc. +onnxruntime/test/optimizer/memory_optimizer_test.cc. - Be noticed, after run this script, manually rename recompute_XXXX_execution_model_training.onnx to - recompute_XXXX.onnx - """ +Be noticed, after run this script, manually rename recompute_XXXX_execution_model_training.onnx to +recompute_XXXX.onnx +""" import torch diff --git a/onnxruntime/test/wasm/karma.conf.js b/onnxruntime/test/wasm/karma.conf.js index d6830501d2290..412e449622b5b 100644 --- a/onnxruntime/test/wasm/karma.conf.js +++ b/onnxruntime/test/wasm/karma.conf.js @@ -38,6 +38,10 @@ const gtestReporter = {'reporter:gtest': ['type', function() { }; }]}; +// In Node.js v16 and below, 'localhost' is using IPv4, so need to listen to '0.0.0.0' +// In Node.js v17+, 'localhost' is using IPv6, so need to listen to '::' +const listenAddress = Number.parseInt(process.versions.node.split('.')[0]) >= 17 ? '::' : '0.0.0.0'; + module.exports = function(config) { config.set({ basePath: '.', @@ -60,6 +64,7 @@ module.exports = function(config) { browserDisconnectTimeout: 600000, // allow running tests for 30 minutes browserNoActivityTimeout: 30 * 60 * 1000, + listenAddress, customLaunchers: { ChromeTest: { base: 'ChromeCanary', diff --git a/orttraining/orttraining/core/graph/gradient_builder.cc b/orttraining/orttraining/core/graph/gradient_builder.cc index 43835f07c4b40..1ba52ca9e51c4 100755 --- a/orttraining/orttraining/core/graph/gradient_builder.cc +++ b/orttraining/orttraining/core/graph/gradient_builder.cc @@ -2239,5 +2239,23 @@ IMPLEMENT_GRADIENT_BUILDER(GetAtanGradient) { return result; } +IMPLEMENT_GRADIENT_BUILDER(GetGlobalMaxPoolGradient) { + // For GlobalMaxPool's gradient, a binary mask flags max elements. + // We multiply that mask by the incoming gradient, passing gradients only to maxima. + std::vector result; + result.push_back(NodeDef("Shape", {I(0)}, {IA("X_shape")})); + result.push_back(NodeDef("Expand", {O(0), IA("X_shape")}, {IA("expanded_Y")})); + result.push_back(NodeDef("Equal", {I(0), IA("expanded_Y")}, {IA("mask")})); + result.push_back(NodeDef("Cast", + {IA("mask")}, + {IA("mask_cast")}, + {MakeAttribute("to", static_cast(IElemType(0)))})); + + result.push_back(NodeDef("Expand", {GO(0), IA("X_shape")}, {IA("expanded_dY")})); + result.push_back(NodeDef("Mul", {IA("mask_cast"), IA("expanded_dY")}, {GI(0)})); + + return result; +} + } // namespace training } // namespace onnxruntime diff --git a/orttraining/orttraining/core/graph/gradient_builder.h b/orttraining/orttraining/core/graph/gradient_builder.h index 2b40754b6261f..2611e742f342a 100755 --- a/orttraining/orttraining/core/graph/gradient_builder.h +++ b/orttraining/orttraining/core/graph/gradient_builder.h @@ -94,6 +94,7 @@ DECLARE_GRADIENT_BUILDER(GetLeakyReluGradient) DECLARE_GRADIENT_BUILDER(GetConvTransposeGradient) DECLARE_GRADIENT_BUILDER(GetResizeGradient) DECLARE_GRADIENT_BUILDER(GetAtanGradient) +DECLARE_GRADIENT_BUILDER(GetGlobalMaxPoolGradient) DECLARE_GRADIENT_BUILDER(GetExternalGradient) diff --git a/orttraining/orttraining/core/graph/gradient_builder_registry.cc b/orttraining/orttraining/core/graph/gradient_builder_registry.cc index 9c9884c5d3865..a04d909267142 100755 --- a/orttraining/orttraining/core/graph/gradient_builder_registry.cc +++ b/orttraining/orttraining/core/graph/gradient_builder_registry.cc @@ -126,6 +126,7 @@ void GradientBuilderRegistry::RegisterGradientBuilders() { REGISTER_GRADIENT_BUILDER("ConvTranspose", GetConvTransposeGradient); REGISTER_GRADIENT_BUILDER("Resize", GetResizeGradient); REGISTER_GRADIENT_BUILDER("Atan", GetAtanGradient); + REGISTER_GRADIENT_BUILDER("GlobalMaxPool", GetGlobalMaxPoolGradient); REGISTER_GRADIENT_BUILDER("ExternalGradient", GetExternalGradient); }; diff --git a/orttraining/orttraining/python/training/__init__.py b/orttraining/orttraining/python/training/__init__.py index 1da95dff94f9f..d77c571f22f52 100644 --- a/orttraining/orttraining/python/training/__init__.py +++ b/orttraining/orttraining/python/training/__init__.py @@ -15,9 +15,9 @@ __all__ = [ "PropagateCastOpsStrategy", "TrainingParameters", - "is_ortmodule_available", "amp", "artifacts", + "is_ortmodule_available", "optim", ] diff --git a/orttraining/orttraining/python/training/_utils.py b/orttraining/orttraining/python/training/_utils.py index 091274d1d171d..a592c9f4b8f28 100644 --- a/orttraining/orttraining/python/training/_utils.py +++ b/orttraining/orttraining/python/training/_utils.py @@ -175,8 +175,8 @@ def static_vars(**kwargs): """ def decorate(func): - for k in kwargs: - setattr(func, k, kwargs[k]) + for k, v in kwargs.items(): + setattr(func, k, v) return func return decorate diff --git a/orttraining/orttraining/python/training/artifacts.py b/orttraining/orttraining/python/training/artifacts.py index 31591c0156b14..c304d2f262650 100644 --- a/orttraining/orttraining/python/training/artifacts.py +++ b/orttraining/orttraining/python/training/artifacts.py @@ -6,7 +6,6 @@ import os import pathlib from enum import Enum -from typing import List, Optional, Union import onnx @@ -40,18 +39,18 @@ class OptimType(Enum): def generate_artifacts( - model: Union[onnx.ModelProto, str], - requires_grad: Optional[List[str]] = None, - frozen_params: Optional[List[str]] = None, - loss: Optional[Union[LossType, onnxblock.Block]] = None, - optimizer: Optional[Union[OptimType, onnxblock.Block]] = None, - artifact_directory: Optional[Union[str, bytes, os.PathLike]] = None, + model: onnx.ModelProto | str, + requires_grad: list[str] | None = None, + frozen_params: list[str] | None = None, + loss: LossType | onnxblock.Block | None = None, + optimizer: OptimType | onnxblock.Block | None = None, + artifact_directory: str | bytes | os.PathLike | None = None, prefix: str = "", ort_format: bool = False, - custom_op_library: Optional[Union[str, bytes, os.PathLike]] = None, - additional_output_names: Optional[List[str]] = None, + custom_op_library: str | bytes | os.PathLike | None = None, + additional_output_names: list[str] | None = None, nominal_checkpoint: bool = False, - loss_input_names: Optional[List[str]] = None, + loss_input_names: list[str] | None = None, ) -> None: """Generates artifacts required for training with ORT training api. diff --git a/orttraining/orttraining/python/training/experimental/gradient_graph/_gradient_graph_tools.py b/orttraining/orttraining/python/training/experimental/gradient_graph/_gradient_graph_tools.py index 5ab79b3712472..9ea12753a254b 100644 --- a/orttraining/orttraining/python/training/experimental/gradient_graph/_gradient_graph_tools.py +++ b/orttraining/orttraining/python/training/experimental/gradient_graph/_gradient_graph_tools.py @@ -1,6 +1,7 @@ import io +from collections.abc import Callable from pathlib import Path -from typing import Any, Callable, Optional, Union # noqa: F401 +from typing import Any import torch from torch.onnx import TrainingMode @@ -15,7 +16,7 @@ def export_gradient_graph( loss_fn: Callable[[Any, Any], Any], example_input: torch.Tensor, example_labels: torch.Tensor, - gradient_graph_path: Union[Path, str], + gradient_graph_path: Path | str, opset_version=12, ) -> None: r""" @@ -45,7 +46,7 @@ def export_gradient_graph( class WrapperModule(torch.nn.Module): def forward(self, model_input, expected_labels, *model_params): - for param, set_param in zip(model.parameters(), model_params): + for param, set_param in zip(model.parameters(), model_params, strict=False): param.data = set_param.data output = model(model_input) loss = loss_fn(output, expected_labels) diff --git a/orttraining/orttraining/python/training/onnxblock/__init__.py b/orttraining/orttraining/python/training/onnxblock/__init__.py index 3a1ca772453f5..7497bd5fff63a 100644 --- a/orttraining/orttraining/python/training/onnxblock/__init__.py +++ b/orttraining/orttraining/python/training/onnxblock/__init__.py @@ -12,15 +12,15 @@ from onnxruntime.training.onnxblock.onnxblock import ForwardBlock, TrainingBlock __all__ = [ - "blocks", - "loss", - "optim", "Block", "ForwardBlock", "TrainingBlock", - "load_checkpoint_to_model", - "save_checkpoint", "base", + "blocks", "custom_op_library", "empty_base", + "load_checkpoint_to_model", + "loss", + "optim", + "save_checkpoint", ] diff --git a/orttraining/orttraining/python/training/onnxblock/_graph_utils.py b/orttraining/orttraining/python/training/onnxblock/_graph_utils.py index 42743a4200d17..fd10e6b65fb84 100644 --- a/orttraining/orttraining/python/training/onnxblock/_graph_utils.py +++ b/orttraining/orttraining/python/training/onnxblock/_graph_utils.py @@ -1,7 +1,6 @@ # Copyright (c) Microsoft Corporation. All rights reserved. # Licensed under the MIT License. -from typing import List, Union import onnx @@ -43,7 +42,7 @@ def generate_graph_name(token: str) -> str: return f"onnx::{token}::{_get_token()}" -def register_graph_outputs(model: onnx.ModelProto, output_names: Union[List[str], str]) -> None: +def register_graph_outputs(model: onnx.ModelProto, output_names: list[str] | str) -> None: """Register the given output names as graph outputs. The graph outputs shape information is extracted from the graph value_infos and diff --git a/orttraining/orttraining/python/training/onnxblock/_training_graph_utils.py b/orttraining/orttraining/python/training/onnxblock/_training_graph_utils.py index 1213342004d48..fbdbac3504b65 100644 --- a/orttraining/orttraining/python/training/onnxblock/_training_graph_utils.py +++ b/orttraining/orttraining/python/training/onnxblock/_training_graph_utils.py @@ -4,7 +4,6 @@ import copy import logging import os -from typing import List, Optional, Set, Tuple, Union import onnx @@ -35,7 +34,7 @@ def disable_training_mode_batchnorm(node): ops_to_disable_training_mode_func_map[node.op_type](node) -def _reorder_outputs(model: onnx.ModelProto, user_output_names: List[str], requires_grad: Set[str]) -> None: +def _reorder_outputs(model: onnx.ModelProto, user_output_names: list[str], requires_grad: set[str]) -> None: """Reorders the outputs of the model to match the order of [user_outputs, gradients]""" graph_outputs = {output.name: output for output in model.graph.output} @@ -50,7 +49,7 @@ def _reorder_outputs(model: onnx.ModelProto, user_output_names: List[str], requi model.graph.output.extend(ordered_graph_outputs) -def _move_initializers_to_inputs(model: onnx.ModelProto, initializer_names: Optional[Set[str]] = None) -> None: +def _move_initializers_to_inputs(model: onnx.ModelProto, initializer_names: set[str] | None = None) -> None: # Move all trainable and non trainable initializers to graph inputs. # This allows training to pass in the parameters from outside the graph # so as to share the parameters across multiple sessions. @@ -70,9 +69,9 @@ def _move_initializers_to_inputs(model: onnx.ModelProto, initializer_names: Opti def _gradient_model_for( model: onnx.ModelProto, - requires_grad: Set[str], + requires_grad: set[str], loss_name: str, - options: Optional[SessionOptions] = None, + options: SessionOptions | None = None, ) -> onnx.ModelProto: """Builds the gradient graph on top of the given input forward only graph.""" @@ -87,11 +86,11 @@ def _gradient_model_for( def build_gradient_graph( model: onnx.ModelProto, - requires_grad: Set[str], - frozen_params: Set[str], - output_names: Union[List[str], str], - custom_op_library: Optional[str] = None, -) -> Tuple[onnx.ModelProto, onnx.ModelProto]: + requires_grad: set[str], + frozen_params: set[str], + output_names: list[str] | str, + custom_op_library: str | None = None, +) -> tuple[onnx.ModelProto, onnx.ModelProto]: """Prepare the training model and the eval model. This function will restructure the model to prepare for training. @@ -134,7 +133,7 @@ def build_gradient_graph( return gradient_model, eval_model -def build_gradient_accumulation_graph(grad_model: onnx.ModelProto, requires_grad: Set[str]) -> None: +def build_gradient_accumulation_graph(grad_model: onnx.ModelProto, requires_grad: set[str]) -> None: """Builds gradient accumulation nodes on top of a training model. Adds an InPlaceAccumulatorV2 node for every gradient so that the gradients @@ -209,8 +208,8 @@ def build_gradient_accumulation_graph(grad_model: onnx.ModelProto, requires_grad def get_model_parameters( - model: onnx.ModelProto, requires_grad: Set[str], frozen_params: Set[str] -) -> Tuple[List[onnx.TensorProto], List[onnx.TensorProto]]: + model: onnx.ModelProto, requires_grad: set[str], frozen_params: set[str] +) -> tuple[list[onnx.TensorProto], list[onnx.TensorProto]]: """Returns trainable and non trainable onnx model parameters. This function pulls out the model parameters from the initializers in the graph. diff --git a/orttraining/orttraining/python/training/onnxblock/blocks.py b/orttraining/orttraining/python/training/onnxblock/blocks.py index c13843f816f16..24dc263eeb09b 100644 --- a/orttraining/orttraining/python/training/onnxblock/blocks.py +++ b/orttraining/orttraining/python/training/onnxblock/blocks.py @@ -6,7 +6,7 @@ import logging import os from abc import ABC, abstractmethod -from typing import Any, List, Optional +from typing import Any import numpy as np import onnx @@ -402,7 +402,7 @@ def __init__(self, like: str): self._like = like - def build(self, input_name: Optional[str] = None): + def build(self, input_name: str | None = None): cloned_input = None with contextlib.suppress(LookupError): # Suppress LookupError because we want to try to get the input from the output if it's not found in the inputs @@ -428,12 +428,12 @@ def __init__( default_float: float = 0.0, default_int64: int = -1, default_string: str = "_Unused", - keys_floats: Optional[List[float]] = None, - keys_int64s: Optional[List[int]] = None, - keys_strings: Optional[List[str]] = None, - values_floats: Optional[List[float]] = None, - values_int64s: Optional[List[int]] = None, - values_strings: Optional[List[str]] = None, + keys_floats: list[float] | None = None, + keys_int64s: list[int] | None = None, + keys_strings: list[str] | None = None, + values_floats: list[float] | None = None, + values_int64s: list[int] | None = None, + values_strings: list[str] | None = None, ): super().__init__() @@ -443,8 +443,8 @@ def __init__( "default_string": default_string, } - def _add_attributes(names: List[str], values: List[Any]): - for name, value in zip(names, values): + def _add_attributes(names: list[str], values: list[Any]): + for name, value in zip(names, values, strict=False): if value is not None: self._attributes[name] = value diff --git a/orttraining/orttraining/python/training/onnxblock/checkpoint_utils.py b/orttraining/orttraining/python/training/onnxblock/checkpoint_utils.py index de3453c630f9c..74292ea10a522 100644 --- a/orttraining/orttraining/python/training/onnxblock/checkpoint_utils.py +++ b/orttraining/orttraining/python/training/onnxblock/checkpoint_utils.py @@ -2,7 +2,6 @@ # Licensed under the MIT License. import os -from typing import List, Tuple, Union import onnx @@ -11,8 +10,8 @@ def save_checkpoint( - parameters: Tuple[List[onnx.TensorProto], List[onnx.TensorProto]], - path_to_checkpoint: Union[str, os.PathLike], + parameters: tuple[list[onnx.TensorProto], list[onnx.TensorProto]], + path_to_checkpoint: str | os.PathLike, nominal_checkpoint: bool = False, ) -> None: """Saves the parameters to the checkpoint directory path_to_checkpoint. @@ -32,7 +31,7 @@ def save_checkpoint( _save_checkpoint(trainable_params, non_trainable_params, os.fspath(path_to_checkpoint), nominal_checkpoint) -def load_checkpoint_to_model(path_to_checkpoint: Union[str, os.PathLike], model: onnx.ModelProto) -> None: +def load_checkpoint_to_model(path_to_checkpoint: str | os.PathLike, model: onnx.ModelProto) -> None: """Loads the checkpoint to an onnx inference model. Args: diff --git a/orttraining/orttraining/python/training/onnxblock/loss/loss.py b/orttraining/orttraining/python/training/onnxblock/loss/loss.py index 09429dd844187..e0624c6722519 100644 --- a/orttraining/orttraining/python/training/onnxblock/loss/loss.py +++ b/orttraining/orttraining/python/training/onnxblock/loss/loss.py @@ -2,7 +2,6 @@ # Licensed under the MIT License. import copy -from typing import Optional import onnx @@ -62,7 +61,7 @@ class CrossEntropyLoss(blocks.Block): contribute to the input gradient. """ - def __init__(self, weight=None, reduction: str = "mean", ignore_index: Optional[int] = None): + def __init__(self, weight=None, reduction: str = "mean", ignore_index: int | None = None): super().__init__() if reduction not in ["mean", "sum", "none"]: @@ -230,7 +229,7 @@ def __init__(self, reduction: str = "mean"): self._abs = blocks.Abs() self._sub = blocks.Sub() - def build(self, loss_input_name: str, target_name: Optional[str] = "target"): + def build(self, loss_input_name: str, target_name: str | None = "target"): """Adds an L1 loss subgraph on top of the base_model. Args: diff --git a/orttraining/orttraining/python/training/onnxblock/onnxblock.py b/orttraining/orttraining/python/training/onnxblock/onnxblock.py index 64f7acf4dc02c..0cb42cce9e5d5 100644 --- a/orttraining/orttraining/python/training/onnxblock/onnxblock.py +++ b/orttraining/orttraining/python/training/onnxblock/onnxblock.py @@ -3,7 +3,6 @@ import logging from abc import abstractmethod -from typing import List, Tuple import onnx @@ -139,7 +138,7 @@ def requires_grad(self, argument_name: str, value: bool = True): self._requires_grad.remove(argument_name) self._frozen_params.add(argument_name) - def parameters(self) -> Tuple[List[onnx.TensorProto], List[onnx.TensorProto]]: + def parameters(self) -> tuple[list[onnx.TensorProto], list[onnx.TensorProto]]: """Trainable as well as non-trainable (frozen) parameters of the model. Model parameters that are extracted while building the training model @@ -161,7 +160,7 @@ def parameters(self) -> Tuple[List[onnx.TensorProto], List[onnx.TensorProto]]: return self._parameters - def to_model_proto(self) -> Tuple[onnx.ModelProto, onnx.ModelProto]: + def to_model_proto(self) -> tuple[onnx.ModelProto, onnx.ModelProto]: """Returns the training and eval models. Once the gradient graph is built, the training and eval models can be retrieved diff --git a/orttraining/orttraining/python/training/onnxblock/optim/__init__.py b/orttraining/orttraining/python/training/onnxblock/optim/__init__.py index 4384ecf0546cc..6fc4e260c6575 100644 --- a/orttraining/orttraining/python/training/onnxblock/optim/__init__.py +++ b/orttraining/orttraining/python/training/onnxblock/optim/__init__.py @@ -3,4 +3,4 @@ from onnxruntime.training.onnxblock.optim.optim import SGD, AdamW, ClipGradNorm -__all__ = ["AdamW", "ClipGradNorm", "SGD"] +__all__ = ["SGD", "AdamW", "ClipGradNorm"] diff --git a/orttraining/orttraining/python/training/onnxblock/optim/optim.py b/orttraining/orttraining/python/training/onnxblock/optim/optim.py index d14b2efefe916..a18fe7e6414e2 100644 --- a/orttraining/orttraining/python/training/onnxblock/optim/optim.py +++ b/orttraining/orttraining/python/training/onnxblock/optim/optim.py @@ -1,7 +1,6 @@ # Copyright (c) Microsoft Corporation. All rights reserved. # Licensed under the MIT License. -from typing import Dict, List, Optional, Tuple import onnx @@ -66,10 +65,10 @@ def __init__(self): def _build_optimizer_node( self, - input_names: List[str], + input_names: list[str], output_name: str, node_name: str, - node_attributes: Dict, + node_attributes: dict, ) -> str: """ Build and append an optimizer node to the ONNX graph. @@ -135,10 +134,10 @@ def build( class AdamWOptimizer(_OptimizerBase): def __init__( self, - bias_correction: Optional[bool] = True, - betas: Tuple[float, float] = (0.9, 0.999), - eps: Optional[float] = 1e-6, - weight_decay: Optional[float] = 0.0, + bias_correction: bool | None = True, + betas: tuple[float, float] = (0.9, 0.999), + eps: float | None = 1e-6, + weight_decay: float | None = 0.0, ): super().__init__() @@ -242,7 +241,7 @@ def _optimizer_specific_logic( learning_rate_name: str, params_name: str, gradients_name: str, - trainable_parameters: Tuple[List[onnx.TensorProto], List[onnx.TensorProto]], + trainable_parameters: tuple[list[onnx.TensorProto], list[onnx.TensorProto]], ) -> str: raise NotImplementedError("Subclasses must implement _optimizer_specific_logic method.") @@ -264,7 +263,7 @@ def _optimizer_specific_logic( learning_rate_name: str, params_name: str, gradients_name: str, - trainable_parameters: Tuple[List[onnx.TensorProto], List[onnx.TensorProto]], + trainable_parameters: tuple[list[onnx.TensorProto], list[onnx.TensorProto]], ) -> str: onnx_model = self.base step_name = "step" @@ -307,7 +306,7 @@ def _optimizer_specific_logic( learning_rate_name: str, params_name: str, gradients_name: str, - trainable_parameters: Tuple[List[onnx.TensorProto], List[onnx.TensorProto]], + trainable_parameters: tuple[list[onnx.TensorProto], list[onnx.TensorProto]], ) -> str: onnx_model = self.base updated_flag_name = self._sgd(learning_rate_name, params_name, gradients_name) diff --git a/orttraining/orttraining/python/training/optim/__init__.py b/orttraining/orttraining/python/training/optim/__init__.py index 3cace4d30c77d..2ce3a32b0b38b 100644 --- a/orttraining/orttraining/python/training/optim/__init__.py +++ b/orttraining/orttraining/python/training/optim/__init__.py @@ -1,8 +1,10 @@ from .config import AdamConfig, LambConfig, SGDConfig, _OptimizerConfig # noqa: F401 from .fp16_optimizer import FP16_Optimizer # noqa: F401 from .fused_adam import AdamWMode, FusedAdam # noqa: F401 -from .lr_scheduler import ConstantWarmupLRScheduler # noqa: F401 -from .lr_scheduler import CosineWarmupLRScheduler # noqa: F401 -from .lr_scheduler import LinearWarmupLRScheduler # noqa: F401 -from .lr_scheduler import PolyWarmupLRScheduler # noqa: F401 -from .lr_scheduler import _LRScheduler # noqa: F401 +from .lr_scheduler import ( + ConstantWarmupLRScheduler, # noqa: F401 + CosineWarmupLRScheduler, # noqa: F401 + LinearWarmupLRScheduler, # noqa: F401 + PolyWarmupLRScheduler, # noqa: F401 + _LRScheduler, # noqa: F401 +) diff --git a/orttraining/orttraining/python/training/optim/_ds_code_store.py b/orttraining/orttraining/python/training/optim/_ds_code_store.py index dc1e20bc3dcff..7e8ba3c213118 100644 --- a/orttraining/orttraining/python/training/optim/_ds_code_store.py +++ b/orttraining/orttraining/python/training/optim/_ds_code_store.py @@ -15,11 +15,7 @@ class Stage1And2_DeepSpeedZeroOptimizer_0_9_2: def has_overflow_serial(self, params, is_grad_list=False): - for p in params: - if p.grad is not None and self._has_inf_or_nan(p.grad.data): - return True - - return False + return any(p.grad is not None and self._has_inf_or_nan(p.grad.data) for p in params) def get_grad_norm_direct(self, gradients, params, norm_type=2): @@ -52,7 +48,7 @@ def get_grad_norm_direct(self, gradients, params, norm_type=2): total_norm = 0.0 # if dist.get_rank() == 0: # logger.info(f"Total Norm beginning {total_norm}") - for g, p in zip(gradients, params): + for g, p in zip(gradients, params, strict=False): # Pipeline parallelism may replicate parameters. Avoid multi-counting. if hasattr(p, PIPE_REPLICATED) and p.ds_pipe_replicated: continue diff --git a/orttraining/orttraining/python/training/optim/_ds_modifier.py b/orttraining/orttraining/python/training/optim/_ds_modifier.py index 55e2e08432137..9d8f178c1c65c 100644 --- a/orttraining/orttraining/python/training/optim/_ds_modifier.py +++ b/orttraining/orttraining/python/training/optim/_ds_modifier.py @@ -178,7 +178,7 @@ def is_model_parallel_parameter(p): #### THIS IS THE FASTER IMPLEMENTATION #### grads_for_norm = [] - for g, p in zip(gradients, params): + for g, p in zip(gradients, params, strict=False): if is_model_parallel_parameter(p) or (target.model_parallel_rank == 0): # BE NOTED: deepspeed original give a double type conversion here, not sure whether this is impacting some models. # https://github.com/microsoft/DeepSpeed/blob/9e5c0c5c3ecabb68b7e9dffac0e9b8d167e3cab8/deepspeed/runtime/zero/stage2.py#L1501 diff --git a/orttraining/orttraining/python/training/optim/_megatron_modifier.py b/orttraining/orttraining/python/training/optim/_megatron_modifier.py index 702eba77cb74a..e10fa40ac9770 100644 --- a/orttraining/orttraining/python/training/optim/_megatron_modifier.py +++ b/orttraining/orttraining/python/training/optim/_megatron_modifier.py @@ -18,8 +18,8 @@ class LegacyMegatronLMModifier(FP16OptimizerModifier): def __init__(self, optimizer, **kwargs) -> None: super().__init__(optimizer) - self.get_horizontal_model_parallel_rank = kwargs.get("get_horizontal_model_parallel_rank", None) - self.get_horizontal_model_parallel_group = kwargs.get("get_horizontal_model_parallel_group", None) + self.get_horizontal_model_parallel_rank = kwargs.get("get_horizontal_model_parallel_rank") + self.get_horizontal_model_parallel_group = kwargs.get("get_horizontal_model_parallel_group") def can_be_modified(self): return self.check_requirements( diff --git a/orttraining/orttraining/python/training/optim/config.py b/orttraining/orttraining/python/training/optim/config.py index d63c7ab40a787..d509c8b06fae0 100644 --- a/orttraining/orttraining/python/training/optim/config.py +++ b/orttraining/orttraining/python/training/optim/config.py @@ -57,9 +57,9 @@ def __init__(self, name, params, defaults): ) for k in group: if k != "params": - assert ( - k in defaults or k.replace("_coef", "") in defaults - ), f"'params' has {k} hyper parameter not present at 'defaults'" + assert k in defaults or k.replace("_coef", "") in defaults, ( + f"'params' has {k} hyper parameter not present at 'defaults'" + ) self.name = name self.lr = float(defaults["lr"]) diff --git a/orttraining/orttraining/python/training/optim/lr_scheduler.py b/orttraining/orttraining/python/training/optim/lr_scheduler.py index 2a9bf438fa172..bef6abb4a2cdc 100644 --- a/orttraining/orttraining/python/training/optim/lr_scheduler.py +++ b/orttraining/orttraining/python/training/optim/lr_scheduler.py @@ -273,9 +273,9 @@ def __init__(self, total_steps, lr_end=1e-7, power=1.0, warmup=0.002): self._num_warmup_steps = warmup * total_steps def _warmup_poly(self, train_step_info): - assert ( - train_step_info.optimizer_config.lr > self.lr_end - ), f"lr_end ({lr_end}) must be be smaller than initial lr ({train_step_info.optimizer_config.lr})" # noqa: F821 + assert train_step_info.optimizer_config.lr > self.lr_end, ( + f"lr_end ({self.lr_end}) must be be smaller than initial lr ({train_step_info.optimizer_config.lr})" + ) if train_step_info.optimization_step < self._num_warmup_steps: return float(train_step_info.optimization_step) / float(max(1, self._num_warmup_steps)) diff --git a/orttraining/orttraining/python/training/ort_triton/__init__.py b/orttraining/orttraining/python/training/ort_triton/__init__.py index 5f2d0c62ffa50..f87f8d73e7332 100644 --- a/orttraining/orttraining/python/training/ort_triton/__init__.py +++ b/orttraining/orttraining/python/training/ort_triton/__init__.py @@ -9,8 +9,12 @@ from onnxruntime.capi import _pybind_state as _C from .kernel import * # noqa: F403 -from .triton_op_executor import register_triton_kernel # noqa: F401 -from .triton_op_executor import call_triton_by_name, call_triton_by_onnx, get_config +from .triton_op_executor import ( + call_triton_by_name, + call_triton_by_onnx, + get_config, + register_triton_kernel, # noqa: F401 +) def run_once_register_triton_op_executor(f): diff --git a/orttraining/orttraining/python/training/ort_triton/_cache.py b/orttraining/orttraining/python/training/ort_triton/_cache.py index b70064377abfc..a79fca0be3fe9 100644 --- a/orttraining/orttraining/python/training/ort_triton/_cache.py +++ b/orttraining/orttraining/python/training/ort_triton/_cache.py @@ -12,7 +12,6 @@ import sys import tempfile from types import ModuleType -from typing import Tuple @functools.lru_cache(None) @@ -49,7 +48,7 @@ def _write(source_code, ext, extra=""): class PyCodeCache: - cache = dict() # noqa: RUF012 + cache = {} # noqa: RUF012 clear = staticmethod(cache.clear) @classmethod @@ -69,11 +68,11 @@ def load(cls, source_code) -> ModuleType: class ModuleCache: - cache = dict() # noqa: RUF012 + cache = {} # noqa: RUF012 clear = staticmethod(cache.clear) @classmethod - def load(cls, key_func, mod_func, *args) -> Tuple[str, ModuleType]: + def load(cls, key_func, mod_func, *args) -> tuple[str, ModuleType]: key = key_func(*args) if key not in cls.cache: func_name, mod = mod_func(*args) diff --git a/orttraining/orttraining/python/training/ort_triton/_codegen.py b/orttraining/orttraining/python/training/ort_triton/_codegen.py index 9a447d8019aff..548b415ea990e 100644 --- a/orttraining/orttraining/python/training/ort_triton/_codegen.py +++ b/orttraining/orttraining/python/training/ort_triton/_codegen.py @@ -12,8 +12,6 @@ """ -from typing import Tuple - import sympy import torch from sympy.codegen.rewriting import create_expand_pow_optimization @@ -49,7 +47,7 @@ def codegen(self, node: IRNode, context: CodegenContext, code_buffer: CodeBuffer assert func is not None, f"unimplemented node: {node.__class__.__name__}" func(node, context, code_buffer, indent) - def _get_elementwise_offset_mask(self, offset_calc: OffsetCalculator, arg_name: str) -> Tuple[str, str]: + def _get_elementwise_offset_mask(self, offset_calc: OffsetCalculator, arg_name: str) -> tuple[str, str]: if offset_calc.is_x_reduced(arg_name): # Scalar. return "tl.full([1], 0, tl.int32)", "" @@ -61,7 +59,7 @@ def _get_elementwise_offset_mask(self, offset_calc: OffsetCalculator, arg_name: offset_str = str(expand_opt(sympy_dot(parse_shape(idx_var), strides))) return offset_str, "xmask" if offset_calc.requires_x_mask else "" - def _get_reduce_offset_mask(self, offset_calc: OffsetCalculator, arg_name: str) -> Tuple[str, str]: + def _get_reduce_offset_mask(self, offset_calc: OffsetCalculator, arg_name: str) -> tuple[str, str]: offset_strs = [] mask_strs = [] if not offset_calc.is_x_reduced(arg_name): @@ -93,7 +91,7 @@ def _get_reduce_offset_mask(self, offset_calc: OffsetCalculator, arg_name: str) offset_strs.append("tl.full([1, 1], 0, tl.int32)") return " + ".join(offset_strs), " & ".join(mask_strs) - def _get_offset_mask(self, offset_calc: OffsetCalculator, arg_name: str) -> Tuple[str, str]: + def _get_offset_mask(self, offset_calc: OffsetCalculator, arg_name: str) -> tuple[str, str]: return ( self._get_reduce_offset_mask(offset_calc, arg_name) if offset_calc.is_reduction @@ -105,9 +103,9 @@ def IONode(self, node: IONode, context: CodegenContext, code_buffer: CodeBuffer, name = node.tensor_arg.name var_name = context.get_variable_name(name) internal_var_name = context.get_internal_variable_name(name) - assert ( - var_name != internal_var_name - ), f"variable name {var_name} and its internal variable name should not be the same." + assert var_name != internal_var_name, ( + f"variable name {var_name} and its internal variable name should not be the same." + ) offset_str, mask_str = self._get_offset_mask(node.offset_calc, node.tensor_arg.name) if offset_str: @@ -359,8 +357,7 @@ def ReduceForLoopStart( # noqa: N802 for reduce_node in node.reduce_nodes: tmp_var_name = "tmp_" + context.get_internal_variable_name(reduce_node.outputs[0].name) code_buffer += ( - f"{space_indent}{tmp_var_name} = " - f"tl.zeros([XBLOCK, RBLOCK], tl.float32) + {reduce_node.default_value}\n" + f"{space_indent}{tmp_var_name} = tl.zeros([XBLOCK, RBLOCK], tl.float32) + {reduce_node.default_value}\n" ) code_buffer += ( f"{space_indent}for roffset in range(0, rnumel, RBLOCK):\n{space_indent} rindex = rbase + roffset\n" @@ -440,9 +437,7 @@ def DropoutNode( # noqa: N802 def ModuleNode(self, node: ModuleNode, context: CodegenContext, code_buffer: CodeBuffer, indent: int): # noqa: N802 space_indent = " " * indent code_buffer += ( - f"{space_indent}import triton\n" - f"{space_indent}import triton.language as tl\n" - f"{space_indent}import torch\n" + f"{space_indent}import triton\n{space_indent}import triton.language as tl\n{space_indent}import torch\n" ) for kernel_node in node.kernels: diff --git a/orttraining/orttraining/python/training/ort_triton/_common.py b/orttraining/orttraining/python/training/ort_triton/_common.py index a1c3d7d7e1d4f..420c02f4c4385 100644 --- a/orttraining/orttraining/python/training/ort_triton/_common.py +++ b/orttraining/orttraining/python/training/ort_triton/_common.py @@ -4,7 +4,7 @@ # -------------------------------------------------------------------------- from abc import abstractmethod -from typing import Any, Dict, List, Tuple +from typing import Any import sympy from onnx import GraphProto, NodeProto, TensorProto @@ -12,7 +12,7 @@ from ._sympy_utils import extract_shape_from_symbol from ._utils import get_attribute, get_reduce_info, next_power_of_2 -_SPECIAL_FLOATS: List[str] = ["inf", "-inf"] +_SPECIAL_FLOATS: list[str] = ["inf", "-inf"] class CodegenContext: @@ -20,8 +20,8 @@ class CodegenContext: record variable name mapping in term of IRnodes. """ - def __init__(self, var_map: Dict[str, str]): - self._var_map: Dict[str, str] = {**var_map} + def __init__(self, var_map: dict[str, str]): + self._var_map: dict[str, str] = {**var_map} # Get variable name by the node arg name in ONNX graph. def get_variable_name(self, name: str) -> str: @@ -36,7 +36,7 @@ def get_internal_variable_name(self, name: str) -> str: class CodeBuffer: def __init__(self): - self.buffer: List[str] = [] + self.buffer: list[str] = [] def __iadd__(self, other: str): self.buffer.append(other) @@ -59,7 +59,7 @@ class SymbolicDSU: """ def __init__(self): - self._dsu: Dict[sympy.Expr, sympy.Expr] = {} + self._dsu: dict[sympy.Expr, sympy.Expr] = {} def find(self, symbolic: sympy.Expr) -> sympy.Expr: if symbolic not in self._dsu: @@ -81,25 +81,25 @@ class TensorInfo: Represent a input/output tensor of a node. """ - def __init__(self, dtype: TensorProto.DataType, shape: List[sympy.Expr]): + def __init__(self, dtype: TensorProto.DataType, shape: list[sympy.Expr]): self._dtype: TensorProto.DataType = dtype - self._shape: List[sympy.Expr] = shape + self._shape: list[sympy.Expr] = shape @property def dtype(self) -> TensorProto.DataType: return self._dtype @property - def shape(self) -> List[sympy.Expr]: + def shape(self) -> list[sympy.Expr]: return self._shape def update_shape(self, symbolics: SymbolicDSU): self._shape = [symbolics.find(dim) if dim.is_symbol else dim for dim in self._shape] -def _infer_elementwise_shape(input_infos: List[TensorInfo], symbolics: SymbolicDSU) -> List[sympy.Expr]: +def _infer_elementwise_shape(input_infos: list[TensorInfo], symbolics: SymbolicDSU) -> list[sympy.Expr]: max_len = max([len(input_info.shape) for input_info in input_infos]) - output_shape: List[sympy.Expr] = [sympy.Integer(1)] * max_len + output_shape: list[sympy.Expr] = [sympy.Integer(1)] * max_len for input_info in input_infos: offset = max_len - len(input_info.shape) for idx, dim in enumerate(input_info.shape): @@ -112,22 +112,22 @@ def _infer_elementwise_shape(input_infos: List[TensorInfo], symbolics: SymbolicD def _infer_elementwise( - node: NodeProto, input_infos: List[TensorInfo], graph: GraphProto, symbolics: SymbolicDSU -) -> List[TensorInfo]: + node: NodeProto, input_infos: list[TensorInfo], graph: GraphProto, symbolics: SymbolicDSU +) -> list[TensorInfo]: # pylint: disable=unused-argument return [TensorInfo(input_infos[0].dtype, _infer_elementwise_shape(input_infos, symbolics))] def _infer_where( - node: NodeProto, input_infos: List[TensorInfo], graph: GraphProto, symbolics: SymbolicDSU -) -> List[TensorInfo]: + node: NodeProto, input_infos: list[TensorInfo], graph: GraphProto, symbolics: SymbolicDSU +) -> list[TensorInfo]: # pylint: disable=unused-argument return [TensorInfo(input_infos[1].dtype, _infer_elementwise_shape(input_infos, symbolics))] def _infer_reduction( - node: NodeProto, input_infos: List[TensorInfo], graph: GraphProto, symbolics: SymbolicDSU -) -> List[TensorInfo]: + node: NodeProto, input_infos: list[TensorInfo], graph: GraphProto, symbolics: SymbolicDSU +) -> list[TensorInfo]: # pylint: disable=unused-argument input_rank = len(input_infos[0].shape) keep_dims, axes = get_reduce_info(node, graph, input_rank) @@ -141,15 +141,15 @@ def _infer_reduction( def _infer_unary( - node: NodeProto, input_infos: List[TensorInfo], graph: GraphProto, symbolics: SymbolicDSU -) -> List[TensorInfo]: + node: NodeProto, input_infos: list[TensorInfo], graph: GraphProto, symbolics: SymbolicDSU +) -> list[TensorInfo]: # pylint: disable=unused-argument return [input_infos[0]] def _infer_cast( - node: NodeProto, input_infos: List[TensorInfo], graph: GraphProto, symbolics: SymbolicDSU -) -> List[TensorInfo]: + node: NodeProto, input_infos: list[TensorInfo], graph: GraphProto, symbolics: SymbolicDSU +) -> list[TensorInfo]: # pylint: disable=unused-argument dtype = get_attribute(node, "to", TensorProto.UNDEFINED) assert dtype != TensorProto.UNDEFINED @@ -157,8 +157,8 @@ def _infer_cast( def _infer_dropout( - node: NodeProto, input_infos: List[TensorInfo], graph: GraphProto, symbolics: SymbolicDSU -) -> List[TensorInfo]: + node: NodeProto, input_infos: list[TensorInfo], graph: GraphProto, symbolics: SymbolicDSU +) -> list[TensorInfo]: # pylint: disable=unused-argument return [input_infos[0], TensorInfo(TensorProto.BOOL, input_infos[0].shape)] @@ -190,8 +190,8 @@ class TypeAndShapeInfer: @classmethod def infer( - cls, node: NodeProto, input_infos: List[TensorInfo], graph: GraphProto, symbolics: SymbolicDSU - ) -> List[TensorInfo]: + cls, node: NodeProto, input_infos: list[TensorInfo], graph: GraphProto, symbolics: SymbolicDSU + ) -> list[TensorInfo]: if node.op_type not in cls._INFER_FUNC_MAP: raise NotImplementedError(f"Unsupported op type: {node.op_type}") return cls._INFER_FUNC_MAP[node.op_type](node, input_infos, graph, symbolics) @@ -224,7 +224,7 @@ def __init__(self, x_numel: sympy.Expr, r_numel: sympy.Expr, contiguous: bool): ) ) ) - self.configs: List[Tuple[int, int, int]] = self._gen_autotune_configs(x_numel_int, r_numel_int, contiguous) + self.configs: list[tuple[int, int, int]] = self._gen_autotune_configs(x_numel_int, r_numel_int, contiguous) # If there is symbolic shape, we will not tune the kernel. if not x_numel.is_number or not r_numel.is_number: self.configs = self.configs[-1:] @@ -233,13 +233,13 @@ def __init__(self, x_numel: sympy.Expr, r_numel: sympy.Expr, contiguous: bool): def _num_warps(self, x: int, r: int) -> int: return min(max(x * r // 256, 2), 8) - def _gen_config(self, xnp2: int, rnp2: int, x: int, r: int) -> Tuple[int, int, int]: + def _gen_config(self, xnp2: int, rnp2: int, x: int, r: int) -> tuple[int, int, int]: x = min(x, xnp2) r = min(r, rnp2) return x, r, self._num_warps(x, r) # TODO: we need to tune more kernels to get more reasonable configs for better performance. - def _gen_autotune_configs(self, x_numel: int, r_numel: int, contiguous: bool) -> List[Tuple[int, int, int]]: + def _gen_autotune_configs(self, x_numel: int, r_numel: int, contiguous: bool) -> list[tuple[int, int, int]]: configs = [] xnp2 = next_power_of_2(x_numel) if r_numel == 1: diff --git a/orttraining/orttraining/python/training/ort_triton/_decompose.py b/orttraining/orttraining/python/training/ort_triton/_decompose.py index c1ded3975d3a6..e8d4a41c8697d 100644 --- a/orttraining/orttraining/python/training/ort_triton/_decompose.py +++ b/orttraining/orttraining/python/training/ort_triton/_decompose.py @@ -8,8 +8,6 @@ "simple ops" can be executed in one pass """ -from typing import List - import sympy from onnx import GraphProto, NodeProto, TensorProto, helper @@ -30,7 +28,7 @@ class DecomposeDispatch: def __init__(self): self.count = 0 - def __call__(self, node: NodeProto, graph: GraphProto, **kwargs) -> List[NodeProto]: + def __call__(self, node: NodeProto, graph: GraphProto, **kwargs) -> list[NodeProto]: op_type = node.op_type if not hasattr(self, op_type): raise NotImplementedError(f"Not implemented for op type: {op_type}") @@ -65,7 +63,7 @@ def _decompose_elementwise_precision(self, node: NodeProto, **kwargs): node_name = node.name y = node.output[0] op_type = node.op_type - inputs = [input for input in node.input] + inputs = list(node.input) cast_nodes = [] for idx, input in enumerate(inputs): dtype, _ = self._get_dtype_and_shape(input, **kwargs) diff --git a/orttraining/orttraining/python/training/ort_triton/_ir.py b/orttraining/orttraining/python/training/ort_triton/_ir.py index 23abb082c2217..fdfd308b45cbd 100644 --- a/orttraining/orttraining/python/training/ort_triton/_ir.py +++ b/orttraining/orttraining/python/training/ort_triton/_ir.py @@ -5,7 +5,7 @@ from abc import abstractmethod from collections import defaultdict -from typing import Any, Dict, List, Optional, Set, Tuple +from typing import Any import sympy import torch @@ -22,16 +22,16 @@ class TensorArg: If it's constant (initializer or constant node), it also contains the data in numpy array. """ - def __init__(self, name: str, tensor_info: Optional[TensorInfo] = None, data: Optional[torch.Tensor] = None): + def __init__(self, name: str, tensor_info: TensorInfo | None = None, data: torch.Tensor | None = None): self._name: str = name - self._data: Optional[torch.Tensor] = data + self._data: torch.Tensor | None = data if data is not None: self._dtype: torch.dtype = data.dtype - self._shape: List[sympy.Expr] = parse_shape(list(data.shape)) + self._shape: list[sympy.Expr] = parse_shape(list(data.shape)) else: assert tensor_info is not None self._dtype: torch.dtype = to_torch_dtype(tensor_info.dtype) - self._shape: List[sympy.Expr] = tensor_info.shape + self._shape: list[sympy.Expr] = tensor_info.shape self.cross_kernels: bool = False @property @@ -43,11 +43,11 @@ def dtype(self) -> torch.dtype: return self._dtype @property - def shape(self) -> List[sympy.Expr]: + def shape(self) -> list[sympy.Expr]: return self._shape @property - def data(self) -> Optional[torch.Tensor]: + def data(self) -> torch.Tensor | None: return self._data @@ -61,18 +61,18 @@ class OffsetCalculator: If a reduce node has non-contiguous axes, need to decompose it into multiple reduce nodes before code-gen. """ - def __init__(self, target_shape: List[sympy.Expr], reduce_axes: List[int]): - self.target_shape: List[sympy.Expr] = target_shape + def __init__(self, target_shape: list[sympy.Expr], reduce_axes: list[int]): + self.target_shape: list[sympy.Expr] = target_shape self.is_reduction: bool = len(reduce_axes) > 0 self.rank = len(target_shape) self.reduce_axes = sort_reduce_axes(reduce_axes, self.rank) - self.x_dims: List[sympy.Expr] = [target_shape[dim] for dim in range(self.rank) if dim not in self.reduce_axes] + self.x_dims: list[sympy.Expr] = [target_shape[dim] for dim in range(self.rank) if dim not in self.reduce_axes] self.x_rank: int = len(self.x_dims) self.x_numel: sympy.Expr = sympy.prod(self.x_dims) if self.x_rank > 0 else sympy.Integer(1) - self.r_dims: List[sympy.Expr] = [target_shape[dim] for dim in self.reduce_axes] + self.r_dims: list[sympy.Expr] = [target_shape[dim] for dim in self.reduce_axes] self.r_rank: int = len(self.r_dims) self.r_numel: sympy.Expr = sympy.prod(self.r_dims) if self.r_rank > 0 else sympy.Integer(1) - self.x_strides: List[sympy.Expr] = [] + self.x_strides: list[sympy.Expr] = [] if self.x_rank > 0: self.x_strides.append(sympy.Integer(1)) for i in range(self.x_rank - 2, -1, -1): @@ -80,14 +80,14 @@ def __init__(self, target_shape: List[sympy.Expr], reduce_axes: List[int]): # To avoid generating useless code for offset calculation, we use x_compute_dims and r_compute_dims to # track the dimensions that need to be computed in the offset calculation. These 2 sets will be set in # register_tensor_arg function below. - self.x_compute_dims: Set[int] = set() - self.r_strides: List[sympy.Expr] = [] + self.x_compute_dims: set[int] = set() + self.r_strides: list[sympy.Expr] = [] if self.r_rank > 0: self.r_strides.append(sympy.Integer(1)) for i in range(self.r_rank - 2, -1, -1): self.r_strides.insert(0, self.r_strides[0] * self.r_dims[i + 1]) - self.r_compute_dims: Set[int] = set() - self.input_strides: Dict[str, List[sympy.Expr]] = dict() + self.r_compute_dims: set[int] = set() + self.input_strides: dict[str, list[sympy.Expr]] = {} self.autotune_configs: AutotuneConfigs = AutotuneConfigs( self.x_numel, self.r_numel, not self.is_reduction or self.reduce_axes[-1] == self.rank - 1 ) @@ -99,17 +99,17 @@ def __init__(self, target_shape: List[sympy.Expr], reduce_axes: List[int]): self.requires_r_mask: bool = any( simplified_r_numel % sympy.Integer(config[1]) != 0 for config in self.autotune_configs.configs ) - self.reduced_args: Set[str] = set() - self.symbolic_shape_variables: Set[str] = set() + self.reduced_args: set[str] = set() + self.symbolic_shape_variables: set[str] = set() - def get_input_strides(self, name: str) -> List[sympy.Expr]: + def get_input_strides(self, name: str) -> list[sympy.Expr]: assert name in self.input_strides return self.input_strides[name] - def get_x_input_strides(self, name: str) -> List[sympy.Expr]: + def get_x_input_strides(self, name: str) -> list[sympy.Expr]: return [dim for idx, dim in enumerate(self.get_input_strides(name)) if idx not in self.reduce_axes] - def get_r_input_strides(self, name: str) -> List[sympy.Expr]: + def get_r_input_strides(self, name: str) -> list[sympy.Expr]: return [dim for idx, dim in enumerate(self.get_input_strides(name)) if idx in self.reduce_axes] # Whether the x shape of the tensor argument is contiguous and is same as the target shape. @@ -195,9 +195,9 @@ class IRNode: The base class for all IR nodes. """ - def __init__(self, inputs: List[TensorArg], outputs: List[TensorArg]): - self.inputs: List[TensorArg] = inputs - self.outputs: List[TensorArg] = outputs + def __init__(self, inputs: list[TensorArg], outputs: list[TensorArg]): + self.inputs: list[TensorArg] = inputs + self.outputs: list[TensorArg] = outputs @abstractmethod def codegen(self, visitor: NodeVisitor, context: CodegenContext, code_buffer: CodeBuffer, indent: int = 0): @@ -212,13 +212,13 @@ class ComputeNode(IRNode): def __init__( self, op_type: str, - inputs: List[TensorArg], - outputs: List[TensorArg], - attributes: Dict[str, Any] = {}, # noqa: B006 + inputs: list[TensorArg], + outputs: list[TensorArg], + attributes: dict[str, Any] = {}, # noqa: B006 ): super().__init__(inputs, outputs) self._op_type: str = op_type - self._attributes: Dict[str, Any] = attributes + self._attributes: dict[str, Any] = attributes @property def op_type(self): @@ -230,7 +230,7 @@ def attributes(self): class ReduceNode(ComputeNode): - def __init__(self, op_type: str, inputs: List[TensorArg], outputs: List[TensorArg], offset_calc: OffsetCalculator): + def __init__(self, op_type: str, inputs: list[TensorArg], outputs: list[TensorArg], offset_calc: OffsetCalculator): super().__init__(op_type, inputs, outputs) assert op_type == "ReduceSum" or op_type == "ReduceMax" or op_type == "ReduceMin" self.default_value: str = ( @@ -250,9 +250,9 @@ class ReduceForLoopStart(ComputeNode): shared-memory declaration """ - def __init__(self, reduce_nodes: List[ReduceNode], offset_calc: OffsetCalculator): + def __init__(self, reduce_nodes: list[ReduceNode], offset_calc: OffsetCalculator): super().__init__("", [], []) - self.reduce_nodes: List[ReduceNode] = reduce_nodes + self.reduce_nodes: list[ReduceNode] = reduce_nodes self.offset_calc: OffsetCalculator = offset_calc @@ -261,9 +261,9 @@ class ReduceForLoopEnd(ComputeNode): shared-memory reduction """ - def __init__(self, reduce_nodes: List[ReduceNode], offset_calc: OffsetCalculator): + def __init__(self, reduce_nodes: list[ReduceNode], offset_calc: OffsetCalculator): super().__init__("", [], []) - self.reduce_nodes: List[ReduceNode] = reduce_nodes + self.reduce_nodes: list[ReduceNode] = reduce_nodes self.offset_calc: OffsetCalculator = offset_calc @@ -273,7 +273,7 @@ class DropoutNode(ComputeNode): if there are more than one dropout operators in the subgraph. """ - def __init__(self, inputs: List[TensorArg], outputs: List[TensorArg], offset_calc: OffsetCalculator): + def __init__(self, inputs: list[TensorArg], outputs: list[TensorArg], offset_calc: OffsetCalculator): super().__init__("Dropout", inputs, outputs) self.offset_calc: OffsetCalculator = offset_calc self.offset_calc.register_tensor_arg(inputs[0]) @@ -301,14 +301,14 @@ class KernelNode(IRNode): """ - def __init__(self, inputs: List[TensorArg], outputs: List[TensorArg], target_shape: List, reduce_axes: List[int]): + def __init__(self, inputs: list[TensorArg], outputs: list[TensorArg], target_shape: list, reduce_axes: list[int]): super().__init__(inputs, outputs) self.name: str = gen_unique_name("triton") - self.internal_args: Set[str] = set() - self.constants: Dict[str, TensorArg] = dict() - self.target_shape: List[sympy.Expr] = target_shape - self.sub_nodes: List[IRNode] = [] - self.var_map: Dict[str, str] = dict() + self.internal_args: set[str] = set() + self.constants: dict[str, TensorArg] = {} + self.target_shape: list[sympy.Expr] = target_shape + self.sub_nodes: list[IRNode] = [] + self.var_map: dict[str, str] = {} self.has_dropout: bool = False self.offset_calc: OffsetCalculator = OffsetCalculator(target_shape, reduce_axes) @@ -335,18 +335,18 @@ def gen_variable_names(self): class ElementwiseKernelNode(KernelNode): - def __init__(self, inputs: List[TensorArg], outputs: List[TensorArg], target_shape: List[sympy.Expr]): + def __init__(self, inputs: list[TensorArg], outputs: list[TensorArg], target_shape: list[sympy.Expr]): super().__init__(inputs, outputs, target_shape, []) class ReduceKernelNode(KernelNode): def __init__( self, - inputs: List[TensorArg], - outputs: List[TensorArg], - target_shape: List[sympy.Expr], - reduce_axes: List[int], - reduced_args: Set[str], + inputs: list[TensorArg], + outputs: list[TensorArg], + target_shape: list[sympy.Expr], + reduce_axes: list[int], + reduced_args: set[str], ): super().__init__(inputs, outputs, target_shape, reduce_axes) self.offset_calc.reduced_args.update(reduced_args) @@ -361,18 +361,18 @@ class ModuleNode(IRNode): def __init__( self, func_name: str, - inputs: List[TensorArg], - outputs: List[TensorArg], - constants: List[TensorArg], - cross_kernel_args: List[Tuple[TensorArg, int]], - kernels: List[KernelNode], + inputs: list[TensorArg], + outputs: list[TensorArg], + constants: list[TensorArg], + cross_kernel_args: list[tuple[TensorArg, int]], + kernels: list[KernelNode], ): super().__init__(inputs, outputs) self.func_name: str = func_name # Currently need inputs and outputs only. May need intermediate vars and constants later. - self.constants: List[TensorArg] = constants - self.kernels: List[KernelNode] = kernels - self.var_map: Dict[str, str] = dict() + self.constants: list[TensorArg] = constants + self.kernels: list[KernelNode] = kernels + self.var_map: dict[str, str] = {} existing_names = set() for input in self.inputs: name = gen_variable_name(input.name, "in", existing_names) @@ -380,7 +380,7 @@ def __init__( for output in self.outputs: name = gen_variable_name(output.name, "out", existing_names) self.var_map[output.name] = name - self.cross_kernel_args_to_delete: Dict[int, Set[str]] = defaultdict(set) + self.cross_kernel_args_to_delete: dict[int, set[str]] = defaultdict(set) for pair in cross_kernel_args: name = gen_variable_name(pair[0].name, "buf", existing_names) self.cross_kernel_args_to_delete[pair[1]].add(name) diff --git a/orttraining/orttraining/python/training/ort_triton/_lowering.py b/orttraining/orttraining/python/training/ort_triton/_lowering.py index 7253c7935a650..c5f1b96273a69 100644 --- a/orttraining/orttraining/python/training/ort_triton/_lowering.py +++ b/orttraining/orttraining/python/training/ort_triton/_lowering.py @@ -6,7 +6,7 @@ import itertools import warnings from collections import defaultdict -from typing import Any, Dict, List, Set, Tuple +from typing import Any import sympy from onnx import NodeProto, helper @@ -37,31 +37,31 @@ class NodeGroup: """ - def __init__(self, node: NodeProto, reduce_axes: List[int], keep_dims: int, node_arg_infos: Dict[str, TensorInfo]): + def __init__(self, node: NodeProto, reduce_axes: list[int], keep_dims: int, node_arg_infos: dict[str, TensorInfo]): self._node_arg_infos = node_arg_infos - self.nodes_groups: List[Any] = [node] - self.target_shape: List[sympy.Expr] = self._get_target_shape(node) + self.nodes_groups: list[Any] = [node] + self.target_shape: list[sympy.Expr] = self._get_target_shape(node) rank = len(self.target_shape) - self.reduce_axes: List[int] = sort_reduce_axes(reduce_axes, rank) + self.reduce_axes: list[int] = sort_reduce_axes(reduce_axes, rank) x_dims = [self.target_shape[dim] for dim in range(rank) if dim not in self.reduce_axes] # x_numel is meant to hint how many rows of tensor will be processed by each kernel. # x is same as CUDA block in X direction. x_numel: sympy.Expr = sympy.prod(x_dims) if len(x_dims) > 0 else sympy.Integer(1) - r_dims: List[sympy.Expr] = [self.target_shape[dim] for dim in self.reduce_axes] + r_dims: list[sympy.Expr] = [self.target_shape[dim] for dim in self.reduce_axes] # r_numel is meant to hint how many elements in a row of tensor will be processed by each kernel. # r is a abbreviation of reduction, so, it's only used for reduction nodes. r_numel: sympy.Expr = sympy.prod(r_dims) if len(r_dims) > 0 else sympy.Integer(1) self.autotune_configs: AutotuneConfigs = AutotuneConfigs( x_numel, r_numel, len(self.reduce_axes) == 0 or self.reduce_axes[-1] == rank - 1 ) - self.reduced_args: Set[str] = set() + self.reduced_args: set[str] = set() if keep_dims != 1: self.reduced_args.add(node.output[0]) # Check if shape can be broadcasted to target_shape. # For example, [1, 3, 1, 1] can be broadcasted to [1, 3, 5, 7]. # and we support `keepdims = false``, so [1, 3, 5, 7] is compatible with [1, 3, 5]. - def _compatible_shape(self, shape: List[sympy.Expr], split_if_different: bool) -> bool: + def _compatible_shape(self, shape: list[sympy.Expr], split_if_different: bool) -> bool: if split_if_different: return shape == self.target_shape if len(shape) > len(self.target_shape): @@ -88,7 +88,7 @@ def _get_target_shape(self, node): # 2. The target shape of a group is determined by the first node in the group. # we call it dominators, and it determinate the partition strategy of X_numel/R_numel. # A group can't have multiple dominators. - def compatible(self, node: NodeProto, reduce_axes: List[int], keep_dims: int, split_if_different: bool) -> bool: + def compatible(self, node: NodeProto, reduce_axes: list[int], keep_dims: int, split_if_different: bool) -> bool: target_shape = self._get_target_shape(node) if is_reduction_node(node): # If the following nodes are all elementwise nodes on reduce output shape. @@ -105,7 +105,7 @@ def compatible(self, node: NodeProto, reduce_axes: List[int], keep_dims: int, sp # 1. Create a new group with the reduction node. # 2. Add this node to the current group. - def add_node(self, node: NodeProto, reduce_axes: List[int], keep_dims: int): + def add_node(self, node: NodeProto, reduce_axes: list[int], keep_dims: int): if is_reduction_node(node): group = NodeGroup(node, reduce_axes, keep_dims, self._node_arg_infos) self.nodes_groups.append(group) @@ -142,7 +142,7 @@ def dependent_nodes(self, keep_reduce_node: bool): return node_map, reduce_nodes # finalize the group, and return the flatten nodes - def flatten(self, sorted_nodes: List[NodeProto]) -> Tuple[List[NodeProto], List[List[int]]]: + def flatten(self, sorted_nodes: list[NodeProto]) -> tuple[list[NodeProto], list[list[int]]]: if self.autotune_configs.requires_for_loop: layers = [] group_layer = [self] @@ -193,12 +193,12 @@ class KernelIO: """ def __init__(self): - self.module_inputs: List[str] = [] - self.cross_kernel_inputs: List[str] = [] - self.constants: List[str] = [] - self.module_outputs: List[str] = [] - self.cross_kernel_outputs: List[str] = [] - self.internal_args: List[str] = [] + self.module_inputs: list[str] = [] + self.cross_kernel_inputs: list[str] = [] + self.constants: list[str] = [] + self.module_outputs: list[str] = [] + self.cross_kernel_outputs: list[str] = [] + self.internal_args: list[str] = [] class GraphLowering: @@ -217,51 +217,50 @@ class GraphLowering: def __init__(self, sorted_graph: SortedGraph): self._sorted_graph: SortedGraph = sorted_graph - self._node_arg_infos: Dict[str, TensorInfo] = sorted_graph.node_arg_infos - self._module_inputs: List[TensorArg] = [] - self._module_outputs: List[TensorArg] = [] - self._module_constants: List[TensorArg] = [] - self._module_input_names: Set[str] = set() - self._module_output_names: Set[str] = set() - self._module_constant_names: Set[str] = set() - self._tensor_args: Dict[str, TensorArg] = {} + self._node_arg_infos: dict[str, TensorInfo] = sorted_graph.node_arg_infos + self._module_inputs: list[TensorArg] = [] + self._module_outputs: list[TensorArg] = [] + self._module_constants: list[TensorArg] = [] + self._module_input_names: set[str] = set() + self._module_output_names: set[str] = set() + self._module_constant_names: set[str] = set() + self._tensor_args: dict[str, TensorArg] = {} # Extract module inputs, outputs and constants. self._extract_module_io() # Group nodes into NodeGroups, each NodeGroup represents a kernel. - self._groups: List[NodeGroup] = [] + self._groups: list[NodeGroup] = [] self._group_nodes() # Convert NodeGroups to KernelNodes. - self._kernel_nodes: List[KernelNode] = [] - self._kernel_io_list: List[KernelIO] = [] + self._kernel_nodes: list[KernelNode] = [] + self._kernel_io_list: list[KernelIO] = [] self._lower() # A module is map to a real onnx graph. def _extract_module_io(self): graph = self._sorted_graph.original_graph self._module_inputs = [TensorArg(input.name, self._node_arg_infos[input.name]) for input in graph.input] - self._module_input_names = set(arg.name for arg in self._module_inputs) + self._module_input_names = {arg.name for arg in self._module_inputs} self._module_outputs = [TensorArg(output.name, self._node_arg_infos[output.name]) for output in graph.output] - self._module_output_names = set(arg.name for arg in self._module_outputs) + self._module_output_names = {arg.name for arg in self._module_outputs} for initializer in graph.initializer: data = to_torch_tensor(initializer) self._module_constants.append(TensorArg(initializer.name, data=data)) for const_node in self._sorted_graph.const_nodes: data = to_torch_tensor(const_node) self._module_constants.append(TensorArg(const_node.output[0], data=data)) - self._module_constant_names = set(arg.name for arg in self._module_constants) - self._tensor_args = dict( - (arg.name, arg) - for arg in itertools.chain(self._module_inputs, self._module_outputs, self._module_constants) - ) + self._module_constant_names = {arg.name for arg in self._module_constants} + self._tensor_args = { + arg.name: arg for arg in itertools.chain(self._module_inputs, self._module_outputs, self._module_constants) + } - def _get_reduce_info(self, node) -> Tuple[int, List[int]]: + def _get_reduce_info(self, node) -> tuple[int, list[int]]: assert is_reduction_node(node) input_rank = len(self._node_arg_infos[node.input[0]].shape) return get_reduce_info(node, self._sorted_graph.original_graph, input_rank) - def _process_node(self, node: NodeProto, precessors: Dict[str, List[NodeProto]], group: NodeGroup): + def _process_node(self, node: NodeProto, precessors: dict[str, list[NodeProto]], group: NodeGroup): dependent_nodes = set() dependent_nodes.add(node.name) for precessor in precessors[node.name]: @@ -328,7 +327,7 @@ def _group_nodes(self): self._groups.append(group_i) flag.add(i) - def _get_node_io(self, node: NodeProto) -> Tuple[List[TensorArg], List[TensorArg]]: + def _get_node_io(self, node: NodeProto) -> tuple[list[TensorArg], list[TensorArg]]: input_args = [] for input in node.input: if input in self._tensor_args: @@ -345,7 +344,7 @@ def _get_node_io(self, node: NodeProto) -> Tuple[List[TensorArg], List[TensorArg self._tensor_args[output] = output_args[-1] return input_args, output_args - def _extract_kernel_io(self, nodes: List[NodeProto]) -> KernelIO: + def _extract_kernel_io(self, nodes: list[NodeProto]) -> KernelIO: kernel_io = KernelIO() input_set = set() output_set = set() diff --git a/orttraining/orttraining/python/training/ort_triton/_sorted_graph.py b/orttraining/orttraining/python/training/ort_triton/_sorted_graph.py index d67a1c1665200..722f05dfdf493 100644 --- a/orttraining/orttraining/python/training/ort_triton/_sorted_graph.py +++ b/orttraining/orttraining/python/training/ort_triton/_sorted_graph.py @@ -5,7 +5,6 @@ import copy import itertools -from typing import Dict, List, Set import onnx import sympy @@ -30,14 +29,14 @@ class SortedGraph: input_shapes: the shapes of the model inputs. Can be numeric values or symbolic values. """ - def __init__(self, model: ModelProto, input_shapes: List[List[sympy.Expr]]): + def __init__(self, model: ModelProto, input_shapes: list[list[sympy.Expr]]): self._model: ModelProto = model self._graph: GraphProto = model.graph - self._input_shapes: List[List[sympy.Expr]] = input_shapes + self._input_shapes: list[list[sympy.Expr]] = input_shapes # For elementwise graph outputs, when we group nodes to different kernels, if the target shape is different # from other nodes' target shape, even it can be broadcasted, we still need to create a new kernel for it. - self._elementwise_graph_outputs: Set[str] = set() + self._elementwise_graph_outputs: set[str] = set() graph_output_names = [output.name for output in self._graph.output] for node in self._graph.node: if is_elementwise_node(node): @@ -46,12 +45,12 @@ def __init__(self, model: ModelProto, input_shapes: List[List[sympy.Expr]]): ) # Topological sort the nodes in the graph. - self._sorted_nodes: List[NodeProto] = topological_sort( + self._sorted_nodes: list[NodeProto] = topological_sort( [input.name for input in self._graph.input] + [initializer.name for initializer in self._graph.initializer], self._graph.node, ) - self._node_arg_infos: Dict[str, TensorInfo] = {} + self._node_arg_infos: dict[str, TensorInfo] = {} for idx, input in enumerate(self._graph.input): self._node_arg_infos[input.name] = TensorInfo(input.type.tensor_type.elem_type, self._input_shapes[idx]) for initializer in self._graph.initializer: @@ -70,7 +69,7 @@ def __init__(self, model: ModelProto, input_shapes: List[List[sympy.Expr]]): initializers = {} for initializer in self._graph.initializer: initializers[initializer.name] = initializer - self._sorted_initializers: List[TensorProto] = [] + self._sorted_initializers: list[TensorProto] = [] for node in self._sorted_nodes: for input in node.input: if input in initializers: @@ -78,8 +77,8 @@ def __init__(self, model: ModelProto, input_shapes: List[List[sympy.Expr]]): initializers.pop(input) # Split nodes to constant nodes and non-constant nodes. - self._const_nodes: List[NodeProto] = [node for node in self._sorted_nodes if node.op_type == "Constant"] - self._sorted_nodes: List[NodeProto] = [node for node in self._sorted_nodes if node.op_type != "Constant"] + self._const_nodes: list[NodeProto] = [node for node in self._sorted_nodes if node.op_type == "Constant"] + self._sorted_nodes: list[NodeProto] = [node for node in self._sorted_nodes if node.op_type != "Constant"] def __str__(self): """ @@ -140,11 +139,11 @@ def __eq__(self, other): return str(self) == str(other) @property - def const_nodes(self) -> List[NodeProto]: + def const_nodes(self) -> list[NodeProto]: return self._const_nodes @property - def sorted_nodes(self) -> List[NodeProto]: + def sorted_nodes(self) -> list[NodeProto]: return self._sorted_nodes @property @@ -152,11 +151,11 @@ def original_graph(self) -> GraphProto: return self._graph @property - def node_arg_infos(self) -> Dict[str, TensorInfo]: + def node_arg_infos(self) -> dict[str, TensorInfo]: return self._node_arg_infos @property - def elementwise_graph_outputs(self) -> Set[str]: + def elementwise_graph_outputs(self) -> set[str]: return self._elementwise_graph_outputs def _decompose(self): diff --git a/orttraining/orttraining/python/training/ort_triton/_sympy_utils.py b/orttraining/orttraining/python/training/ort_triton/_sympy_utils.py index a4a384c021fe8..1df587fda054e 100644 --- a/orttraining/orttraining/python/training/ort_triton/_sympy_utils.py +++ b/orttraining/orttraining/python/training/ort_triton/_sympy_utils.py @@ -4,7 +4,7 @@ # -------------------------------------------------------------------------- import re -from typing import Any, List +from typing import Any import sympy @@ -15,12 +15,12 @@ def extract_shape_from_symbol(symbol: str) -> int: return int(match.group(3)) -def sympy_dot(seq1: List[sympy.Expr], seq2: List[sympy.Expr]) -> sympy.Expr: +def sympy_dot(seq1: list[sympy.Expr], seq2: list[sympy.Expr]) -> sympy.Expr: assert len(seq1) == len(seq2) - return sympy.expand(sum(a * b for a, b in zip(seq1, seq2))) + return sympy.expand(sum(a * b for a, b in zip(seq1, seq2, strict=False))) -def parse_shape(shape: List[Any]) -> List[sympy.Expr]: +def parse_shape(shape: list[Any]) -> list[sympy.Expr]: symbol_shapes = [] for dim in shape: symbol_dim = dim diff --git a/orttraining/orttraining/python/training/ort_triton/_utils.py b/orttraining/orttraining/python/training/ort_triton/_utils.py index e39a668bd0066..3cf5cfa184861 100644 --- a/orttraining/orttraining/python/training/ort_triton/_utils.py +++ b/orttraining/orttraining/python/training/ort_triton/_utils.py @@ -6,7 +6,7 @@ import re import uuid from collections import defaultdict -from typing import Any, List, Tuple +from typing import Any import numpy as np import torch @@ -27,7 +27,7 @@ def _topological_sort_internal(node, visited, output_consumers, sorted_nodes): # Topological sort of nodes given the input names. The list of nodes contain both constant and non-constant nodes. -def topological_sort(inputs: List[str], nodes: List[NodeProto]) -> List[NodeProto]: +def topological_sort(inputs: list[str], nodes: list[NodeProto]) -> list[NodeProto]: const_nodes = [] non_const_nodes = [] for node in nodes: @@ -119,7 +119,7 @@ def may_add_brackets(name: str) -> str: return name -def sort_reduce_axes(axes: List[int], rank: int, check_contiguous: bool = True) -> List[int]: +def sort_reduce_axes(axes: list[int], rank: int, check_contiguous: bool = True) -> list[int]: axes = [axis + rank if axis < 0 else axis for axis in axes] axes.sort() if check_contiguous: @@ -129,7 +129,7 @@ def sort_reduce_axes(axes: List[int], rank: int, check_contiguous: bool = True) # Get the keep_dims attribute and reduce axes from a reduce node. -def get_reduce_info(node: NodeProto, graph: GraphProto, input_rank: int) -> Tuple[int, List[int]]: +def get_reduce_info(node: NodeProto, graph: GraphProto, input_rank: int) -> tuple[int, list[int]]: keep_dims = get_attribute(node, "keepdims", 1) noop_with_empty_axes = get_attribute(node, "noop_with_empty_axes", 0) axes = get_attribute(node, "axes", None) diff --git a/orttraining/orttraining/python/training/ort_triton/kernel/_flash_attn.py b/orttraining/orttraining/python/training/ort_triton/kernel/_flash_attn.py index f7b7c1ff08300..67394fe297d51 100644 --- a/orttraining/orttraining/python/training/ort_triton/kernel/_flash_attn.py +++ b/orttraining/orttraining/python/training/ort_triton/kernel/_flash_attn.py @@ -40,7 +40,6 @@ """ import math -from typing import List, Tuple import torch import triton @@ -793,7 +792,7 @@ def flash_attn_forward(q, k, v, bias=None, **kwargs): elif bias.shape[2:] == (seqlen_q, seqlen_k): bias_type = "matrix" else: - raise RuntimeError("Last 2 dimensions of bias must be (1, seqlen_k)" " or (seqlen_q, seqlen_k)") + raise RuntimeError("Last 2 dimensions of bias must be (1, seqlen_k) or (seqlen_q, seqlen_k)") bias = bias.expand(batch, nheads, seqlen_q, seqlen_k) bias_strides = (bias.stride(0), bias.stride(1), bias.stride(2)) if has_bias else (0, 0, 0) @@ -903,7 +902,7 @@ def flash_attn_backward(do, q, k, v, o, lse, bias=None, **kwargs): elif bias.shape[2:] == (seqlen_q, seqlen_k): bias_type = "matrix" else: - raise RuntimeError("Last 2 dimensions of bias must be (1, seqlen_k)" " or (seqlen_q, seqlen_k)") + raise RuntimeError("Last 2 dimensions of bias must be (1, seqlen_k) or (seqlen_q, seqlen_k)") bias = bias.expand(batch, nheads, seqlen_q, seqlen_k) bias_strides = (bias.stride(0), bias.stride(1), bias.stride(2)) if has_bias else (0, 0, 0) @@ -1009,7 +1008,7 @@ def _make_flash_attention_nodes( # Without causal mask, without Dropout. For example, BERT model in HuggingFace. -_PATTERN_0: List[Tuple[str, bool, List[Tuple[int, int, int]]]] = [ +_PATTERN_0: list[tuple[str, bool, list[tuple[int, int, int]]]] = [ ("MatMul", False, []), # 0 ("Transpose", True, [(0, 0, 0)]), # 1 ("Transpose", True, [(0, 0, 1)]), # 2 @@ -1034,7 +1033,7 @@ def _make_flash_attention_nodes( ] -def _optimize_for_pattern_0(matcher: GraphMatcher, idx: int, nodes: List[NodeProto]): +def _optimize_for_pattern_0(matcher: GraphMatcher, idx: int, nodes: list[NodeProto]): # Check forward only as the backward is expected to be consistent if it's built correctly. scale_value = matcher.get_constant_value(nodes[3].input[1]) if not ( @@ -1063,7 +1062,7 @@ def _optimize_for_pattern_0(matcher: GraphMatcher, idx: int, nodes: List[NodePro # llama2+peft, k doesn't require grad. -_PATTERN_1: List[Tuple[str, bool, List[Tuple[int, int, int]]]] = [ +_PATTERN_1: list[tuple[str, bool, list[tuple[int, int, int]]]] = [ ("MatMul", False, []), # 0 ("Transpose", True, [(0, 0, 1)]), # 1 ("Div", False, [(0, 0, 0)]), # 2 @@ -1087,7 +1086,7 @@ def _optimize_for_pattern_0(matcher: GraphMatcher, idx: int, nodes: List[NodePro ] -def _optimize_for_pattern_1(matcher: GraphProto, idx: int, nodes: List[NodeProto]): +def _optimize_for_pattern_1(matcher: GraphProto, idx: int, nodes: list[NodeProto]): # Check forward only as the backward is expected to be consistent if it's built correctly. scale_value = matcher.get_constant_value(nodes[2].input[1]) if not ( @@ -1138,7 +1137,7 @@ def _optimize_for_pattern_1(matcher: GraphProto, idx: int, nodes: List[NodeProto # llama2+peft, k requires grad. -_PATTERN_2: List[Tuple[str, bool, List[Tuple[int, int, int]]]] = [ +_PATTERN_2: list[tuple[str, bool, list[tuple[int, int, int]]]] = [ ("MatMul", False, []), # 0 ("Transpose", True, [(0, 0, 1)]), # 1 ("Div", False, [(0, 0, 0)]), # 2 @@ -1164,7 +1163,7 @@ def _optimize_for_pattern_1(matcher: GraphProto, idx: int, nodes: List[NodeProto ] -def _aptimize_for_pattern_2(matcher: GraphProto, idx: int, nodes: List[NodeProto]): +def _aptimize_for_pattern_2(matcher: GraphProto, idx: int, nodes: list[NodeProto]): # Check forward only as the backward is expected to be consistent if it's built correctly. scale_value = matcher.get_constant_value(nodes[2].input[1]) if not ( diff --git a/orttraining/orttraining/python/training/ort_triton/kernel/_mm.py b/orttraining/orttraining/python/training/ort_triton/kernel/_mm.py index 1a944082fa4ba..27256dcc4ac8e 100644 --- a/orttraining/orttraining/python/training/ort_triton/kernel/_mm.py +++ b/orttraining/orttraining/python/training/ort_triton/kernel/_mm.py @@ -6,7 +6,6 @@ import math import os from types import ModuleType -from typing import Tuple import torch @@ -287,21 +286,21 @@ def _mm_configs(dtype, m, n, k, trans_a, trans_b, alpha, func_name): if alpha != 1.0: post_process = f"({post_process})" post_process = f"{post_process}.to(tl.float16)" - return dict( - autotune_configs=autotune_configs, - kernel_name=f"kernel_{func_name}", - M=m, - N=n, - K=k, - stride_am=(1 if trans_a else k), - stride_ak=(m if trans_a else 1), - stride_bk=(1 if trans_b else n), - stride_bn=(k if trans_b else 1), - even_k=(k % max_bk == 0), - allow_tf32=torch.backends.cuda.matmul.allow_tf32, - post_process=post_process, - func_name=func_name, - ) + return { + "autotune_configs": autotune_configs, + "kernel_name": f"kernel_{func_name}", + "M": m, + "N": n, + "K": k, + "stride_am": (1 if trans_a else k), + "stride_ak": (m if trans_a else 1), + "stride_bk": (1 if trans_b else n), + "stride_bn": (k if trans_b else 1), + "even_k": (k % max_bk == 0), + "allow_tf32": torch.backends.cuda.matmul.allow_tf32, + "post_process": post_process, + "func_name": func_name, + } def _gen_mm_key(dtype: torch.dtype, m: int, n: int, k: int, trans_a: bool, trans_b: bool, alpha: float) -> int: @@ -310,7 +309,7 @@ def _gen_mm_key(dtype: torch.dtype, m: int, n: int, k: int, trans_a: bool, trans def _gen_mm_module( dtype: torch.dtype, m: int, n: int, k: int, trans_a: bool, trans_b: bool, alpha: float -) -> Tuple[str, ModuleType]: +) -> tuple[str, ModuleType]: func_name = gen_unique_name("mm") kwargs = _mm_configs(dtype, m, n, k, trans_a, trans_b, alpha, func_name) src_code = _MM_TEMPLATE.format(**kwargs) @@ -347,7 +346,7 @@ def _gen_gemm_module( trans_b: bool, alpha: float, beta: float, -) -> Tuple[str, ModuleType]: +) -> tuple[str, ModuleType]: func_name = gen_unique_name("gemm") kwargs = _mm_configs(dtype, m, n, k, trans_a, trans_b, alpha, func_name) kwargs["stride_cm"] = stride_cm @@ -369,7 +368,7 @@ def _gen_bmm_key( def _gen_bmm_module( dtype: torch.dtype, m: int, n: int, k: int, batch_a: int, batch_b: int, trans_a: bool, trans_b: bool, alpha: float -) -> Tuple[str, ModuleType]: +) -> tuple[str, ModuleType]: func_name = gen_unique_name("bmm") kwargs = _mm_configs(dtype, m, n, k, trans_a, trans_b, alpha, func_name) batch = max(batch_a, batch_b) diff --git a/orttraining/orttraining/python/training/ort_triton/triton_op_executor.py b/orttraining/orttraining/python/training/ort_triton/triton_op_executor.py index 14bc2779aa05b..4477d675f5a6b 100644 --- a/orttraining/orttraining/python/training/ort_triton/triton_op_executor.py +++ b/orttraining/orttraining/python/training/ort_triton/triton_op_executor.py @@ -9,7 +9,6 @@ import re import sys from types import ModuleType -from typing import List, Tuple, Union import onnx from onnx import ModelProto @@ -25,11 +24,11 @@ _DEBUG_MODE = "ORTMODULE_TRITON_DEBUG" in os.environ and int(os.getenv("ORTMODULE_TRITON_DEBUG")) == 1 -_CUSTOM_KERNELS = dict() +_CUSTOM_KERNELS = {} @functools.lru_cache(None) -def _gen_module_internal(sorted_graph: SortedGraph) -> Tuple[str, str, ModuleType]: +def _gen_module_internal(sorted_graph: SortedGraph) -> tuple[str, str, ModuleType]: func_name = gen_unique_name("func") src_code = codegen(func_name, sorted_graph) return func_name, src_code, PyCodeCache().load(src_code) @@ -42,7 +41,7 @@ class _ShapeCache: For those dimensions that the concrete shape is changed between different steps, we use a symbolic shape. """ - cache = dict() # noqa: RUF012 + cache = {} # noqa: RUF012 symbolic_shape_hint = None min_symbolic_shape = 0 clear = staticmethod(cache.clear) @@ -54,11 +53,11 @@ def set_symbolic_shape_hint(cls, symbolic_shape_hint_config): cls.min_symbolic_shape = v else: if cls.symbolic_shape_hint is None: - cls.symbolic_shape_hint = dict() + cls.symbolic_shape_hint = {} cls.symbolic_shape_hint[k] = v @classmethod - def get_shape(cls, onnx_key: int, model: ModelProto, shapes: List[List[int]]) -> List[List[Union[int, str]]]: + def get_shape(cls, onnx_key: int, model: ModelProto, shapes: list[list[int]]) -> list[list[int | str]]: if onnx_key not in cls.cache: if cls.symbolic_shape_hint is not None: for i, input in enumerate(model.graph.input): @@ -90,12 +89,12 @@ def get_shape(cls, onnx_key: int, model: ModelProto, shapes: List[List[int]]) -> return cls.cache[onnx_key] -def _gen_key(onnx_key: int, model: ModelProto, shapes: List[List[Union[int, str]]]) -> int: +def _gen_key(onnx_key: int, model: ModelProto, shapes: list[list[int | str]]) -> int: # pylint: disable=unused-argument return hash(f"{onnx_key}|{str(shapes).replace(' ', '')}") -def _gen_module(onnx_key: int, model: ModelProto, shapes: List[List[Union[int, str]]]) -> Tuple[str, ModuleType]: +def _gen_module(onnx_key: int, model: ModelProto, shapes: list[list[int | str]]) -> tuple[str, ModuleType]: sorted_graph = SortedGraph(model, [parse_shape(shape) for shape in shapes]) if _DEBUG_MODE: os.makedirs(os.path.dirname("triton_debug/"), exist_ok=True) @@ -123,7 +122,7 @@ def get_config() -> str: shape. Each dim_param will be replaced by i{input_index}_dim{dim_index}_{power_of_2} in the symbolic shape. """ - config = dict() + config = {} config_file = os.getenv("ORTMODULE_TRITON_CONFIG_FILE", "") if config_file and os.path.exists(config_file): with open(config_file, encoding="UTF-8") as f: diff --git a/orttraining/orttraining/python/training/ortmodule/_custom_autograd_function_exporter.py b/orttraining/orttraining/python/training/ortmodule/_custom_autograd_function_exporter.py index 1efc3a23eef34..9ac65bde82bf8 100644 --- a/orttraining/orttraining/python/training/ortmodule/_custom_autograd_function_exporter.py +++ b/orttraining/orttraining/python/training/ortmodule/_custom_autograd_function_exporter.py @@ -191,7 +191,6 @@ def _export_pt_1_10(g, n, *args, **kwargs): def _default_export( g, func_full_qual_name, func_class, cconv, output_size, output_tensor_types, output_tensor_ranks, *args, **kwargs ): - input_tensor_types = [] input_tensor_ranks = [] @@ -224,7 +223,7 @@ def _default_export( assert len(args) == len(cconv), "Number of arguments does not match calling convention" # Encode inputs to torch.autograd.Function. - for i, arg, call_type in zip(range(len(args)), args, cconv): + for i, arg, call_type in zip(range(len(args)), args, cconv, strict=False): if call_type == "d": # Got a tensor variable. tensor_args.append(arg) diff --git a/orttraining/orttraining/python/training/ortmodule/_custom_op_symbolic_registry.py b/orttraining/orttraining/python/training/ortmodule/_custom_op_symbolic_registry.py index 004e3540c62d6..770542dc47549 100644 --- a/orttraining/orttraining/python/training/ortmodule/_custom_op_symbolic_registry.py +++ b/orttraining/orttraining/python/training/ortmodule/_custom_op_symbolic_registry.py @@ -4,7 +4,7 @@ # -------------------------------------------------------------------------- import os -from typing import Callable +from collections.abc import Callable import torch import torch.onnx.symbolic_helper as sym_help @@ -608,7 +608,7 @@ def einsum_internal(g, equation, tensor_list): # contraction_labels = [k], contraction_axes = [2] for the example. batched_axes = [] matmul_output_axes = [] - contraction_axes = [axis for axis in range(out_size, perm_size)] + contraction_axes = list(range(out_size, perm_size)) for axis in range(out_size): label = result_labels[axis] if label in lhs_labels and label in rhs_labels: diff --git a/orttraining/orttraining/python/training/ortmodule/_execution_agent.py b/orttraining/orttraining/python/training/ortmodule/_execution_agent.py index 047cd4c59d636..8d64caeec6051 100644 --- a/orttraining/orttraining/python/training/ortmodule/_execution_agent.py +++ b/orttraining/orttraining/python/training/ortmodule/_execution_agent.py @@ -3,7 +3,6 @@ # Licensed under the MIT License. # -------------------------------------------------------------------------- -from typing import Tuple import onnxruntime from onnxruntime.capi import _pybind_state as C @@ -166,7 +165,7 @@ def run_backward(self, feeds, fetches, state): def get_serialized_ortmodule_memory_stat( self, memory_optimization_config_file_path: str, recompute_probe_level: str, return_opportunity_table: bool - ) -> Tuple[str, dict]: + ) -> tuple[str, dict]: """ Get serialized memory stats for OrtModule. """ diff --git a/orttraining/orttraining/python/training/ortmodule/_fallback.py b/orttraining/orttraining/python/training/ortmodule/_fallback.py index 56bb45d064d8a..24eae3c369efe 100644 --- a/orttraining/orttraining/python/training/ortmodule/_fallback.py +++ b/orttraining/orttraining/python/training/ortmodule/_fallback.py @@ -6,12 +6,10 @@ import os from enum import IntFlag from logging import Logger -from typing import Optional import torch from . import _logger, _utils -from ._fallback_exceptions import wrap_exception # noqa: F401 from ._fallback_exceptions import ( ORTModuleDeviceException, ORTModuleFallbackException, @@ -19,6 +17,7 @@ ORTModuleIOError, ORTModuleONNXModelException, ORTModuleTorchModelException, + wrap_exception, # noqa: F401 ) @@ -106,7 +105,7 @@ def __init__(self, pytorch_module: torch.nn.Module, policy: _FallbackPolicy, ret self._logger = logger def handle_exception( - self, exception: Exception, log_level: _logger.LogLevel, override_policy: Optional[_FallbackPolicy] = None + self, exception: Exception, log_level: _logger.LogLevel, override_policy: _FallbackPolicy | None = None ) -> None: """Process incoming `exception` based on the selected `policy` diff --git a/orttraining/orttraining/python/training/ortmodule/_gradient_accumulation_manager.py b/orttraining/orttraining/python/training/ortmodule/_gradient_accumulation_manager.py index db23c142f47f6..e0d7e5a19dfcc 100644 --- a/orttraining/orttraining/python/training/ortmodule/_gradient_accumulation_manager.py +++ b/orttraining/orttraining/python/training/ortmodule/_gradient_accumulation_manager.py @@ -37,8 +37,8 @@ def initialize(self, enabled, module, graph_info) -> None: # Since named_parameters() is a generator function, need to avoid overhead and # populate the params in memory to avoid generating the param map every # step. This will not work if the user adds or removes params between steps - self._param_name_value_map = {name: param for name, param in module.named_parameters()} - self._param_version_map = dict() + self._param_name_value_map = dict(module.named_parameters()) + self._param_version_map = {} self._frontier_node_arg_map = graph_info.frontier_node_arg_map self._cached_node_arg_names = graph_info.cached_node_arg_names self._cache_start = len(graph_info.user_output_names) diff --git a/orttraining/orttraining/python/training/ortmodule/_graph_execution_manager.py b/orttraining/orttraining/python/training/ortmodule/_graph_execution_manager.py index c1ff62a5faea7..25dfd9c3d43dd 100755 --- a/orttraining/orttraining/python/training/ortmodule/_graph_execution_manager.py +++ b/orttraining/orttraining/python/training/ortmodule/_graph_execution_manager.py @@ -7,7 +7,6 @@ import logging import os from abc import ABC, abstractmethod # noqa: F401 -from typing import Dict, List, Optional, Tuple import onnx import torch @@ -30,7 +29,7 @@ class _RunStateInfo: - def __init__(self, state, output_info: List[Tuple[torch.Size, torch.device, torch.dtype]]): + def __init__(self, state, output_info: list[tuple[torch.Size, torch.device, torch.dtype]]): """ :param state: State of partial run that contains intermediate tensors needed to resume the run later. :param output_info: Output info. @@ -74,7 +73,7 @@ def __init__( self._flattened_module = module self._onnx_models = _onnx_models.ONNXModels() - self._graph_transition_manager: Optional[GraphTransitionManager] = None + self._graph_transition_manager: GraphTransitionManager | None = None # Model after inference optimization and then gradient building. self._graph_builder = None @@ -341,7 +340,7 @@ def _device(self): return self._graph_transition_manager._device @_logger.TrackTime(_logger.ORTModuleInitPhase.DETECTION) - def _detect_from_inputs(self, inputs: Tuple, kwargs: Dict): + def _detect_from_inputs(self, inputs: tuple, kwargs: dict): """ Based on runtime inspection, enable conditional optimizations if applicable. @@ -381,7 +380,7 @@ def _detect_from_inputs(self, inputs: Tuple, kwargs: Dict): [f"{k}:{v:.0f}%" for k, v in self._runtime_inspector._embedding_module_to_padding_density_map.items()] ) - def _append_pull_weight_trigger_as_input(self, kwargs: Dict, device: torch.device): + def _append_pull_weight_trigger_as_input(self, kwargs: dict, device: torch.device): if self._runtime_options.enable_zero_stage3_support: from ._zero_stage3_compatibility import ( STAGE3_PULL_WEIGHT_TRIGGER_NAME, diff --git a/orttraining/orttraining/python/training/ortmodule/_graph_execution_manager_factory.py b/orttraining/orttraining/python/training/ortmodule/_graph_execution_manager_factory.py index 104cc0a894eed..237aafd6d2c3c 100644 --- a/orttraining/orttraining/python/training/ortmodule/_graph_execution_manager_factory.py +++ b/orttraining/orttraining/python/training/ortmodule/_graph_execution_manager_factory.py @@ -4,7 +4,6 @@ # -------------------------------------------------------------------------- from logging import Logger -from typing import Union from ._fallback import _FallbackManager from ._inference_manager import InferenceManager @@ -24,7 +23,7 @@ def __init__( self._training_manager = TrainingManager(module, debug_options, fallback_manager, logger) self._inference_manager = InferenceManager(module, debug_options, fallback_manager, logger) - def __call__(self, is_training) -> Union[InferenceManager, TrainingManager]: + def __call__(self, is_training) -> InferenceManager | TrainingManager: if is_training: return self._training_manager else: diff --git a/orttraining/orttraining/python/training/ortmodule/_graph_transition_manager.py b/orttraining/orttraining/python/training/ortmodule/_graph_transition_manager.py index d9cae8e1f99e8..5051e832e2480 100755 --- a/orttraining/orttraining/python/training/ortmodule/_graph_transition_manager.py +++ b/orttraining/orttraining/python/training/ortmodule/_graph_transition_manager.py @@ -11,9 +11,9 @@ import logging import os from collections import OrderedDict +from collections.abc import Mapping, Sequence from functools import partial from hashlib import md5 as hash_fn -from typing import Mapping, Sequence import onnx import torch @@ -213,8 +213,8 @@ def construct_inputs( # Create the buffers for the inputs that are either parameters or buffers in the original module. # For user inputs, fill with None for now, and will be filled dynamically during the forward run. - parameter_names = {k: v for k, v in self._flattened_module.named_parameters()} - buffer_names = {k: v for k, v in self._flattened_module.named_buffers()} + parameter_names = dict(self._flattened_module.named_parameters()) + buffer_names = dict(self._flattened_module.named_buffers()) for input_name in self.onnx_graph_input_names: if input_name in parameter_names: @@ -577,12 +577,12 @@ def _reprocess_check( # Model may have unused params dropped after export, so we only check those inputs existing in onnx graph. onnx_graph_input_requires_grads = [] - parameter_names = {k: v for k, v in flatten_module.named_parameters()} + parameter_names = dict(flatten_module.named_parameters()) for input_name in exported_model_info.onnx_graph_input_names: if input_name in exported_model_info.onnx_graph_input_names_user_defined: - assert ( - input_name in model_info_for_export.onnx_graph_input_data_accessor_user_defined - ), f"{input_name} model_info_for_export.onnx_graph_input_data_accessor_user_defined" + assert input_name in model_info_for_export.onnx_graph_input_data_accessor_user_defined, ( + f"{input_name} model_info_for_export.onnx_graph_input_data_accessor_user_defined" + ) # We assume the data accessor should be the same as the one used for the previous export, because # there is args and kwargs schema check during export check phase. if model_info_for_export.onnx_graph_input_data_accessor_user_defined[input_name]( @@ -736,7 +736,6 @@ def _export_model( runtime_inspector: RuntimeInspector, logger: logging.Logger, ) -> tuple[onnx.ModelProto, ORTModelInputOutputSchemaType, list[str], list[str]]: - # Add hooks to check the sparsity of the embedding and label inputs during the export. embedding_hook_handles = GraphTransitionManager._add_check_embedding_sparsity_hook( enable_embedding_sparse_optimizer, device, logger, runtime_inspector, flattened_module diff --git a/orttraining/orttraining/python/training/ortmodule/_inference_manager.py b/orttraining/orttraining/python/training/ortmodule/_inference_manager.py index 61db462ad3bb8..362f1a88ce924 100644 --- a/orttraining/orttraining/python/training/ortmodule/_inference_manager.py +++ b/orttraining/orttraining/python/training/ortmodule/_inference_manager.py @@ -4,7 +4,6 @@ # -------------------------------------------------------------------------- from logging import Logger -from typing import Tuple import onnx import torch @@ -35,7 +34,7 @@ def execution_session_run_forward( onnx_model: onnx.ModelProto, device: torch.device, *inputs, - ) -> Tuple[Tuple[torch.Tensor, ...], _RunStateInfo]: + ) -> tuple[tuple[torch.Tensor, ...], _RunStateInfo]: """Runs the forward pass on `execution_session` with given `onnx_model`, `device` and `inputs` Args: diff --git a/orttraining/orttraining/python/training/ortmodule/_io.py b/orttraining/orttraining/python/training/ortmodule/_io.py index 8ad3d0df3e4fa..f88390130b81f 100644 --- a/orttraining/orttraining/python/training/ortmodule/_io.py +++ b/orttraining/orttraining/python/training/ortmodule/_io.py @@ -7,9 +7,9 @@ import gc import inspect from collections import OrderedDict, abc +from collections.abc import Callable, Mapping, Sequence from functools import partial from logging import Logger -from typing import Callable, Dict, List, Mapping, Optional, Sequence, Tuple import torch @@ -78,7 +78,7 @@ def symbolic(g, self): def deepcopy_model_input( *args, **kwargs -) -> Tuple[Sequence[ORTModelInputOutputType], Mapping[str, ORTModelInputOutputType]]: +) -> tuple[Sequence[ORTModelInputOutputType], Mapping[str, ORTModelInputOutputType]]: def extract_tensor(value): if isinstance(value, torch.Tensor): if value.requires_grad: @@ -101,7 +101,7 @@ def extract_tensor(value): def _extract_schema( data: ORTModelInputOutputType, device -) -> Tuple[Sequence[ORTModelInputOutputType], ORTModelInputOutputSchemaType]: +) -> tuple[Sequence[ORTModelInputOutputType], ORTModelInputOutputSchemaType]: try: flatten_data, schema = extract_data_and_schema(data, constant_as_tensor=True, device=device) return flatten_data, schema @@ -119,15 +119,15 @@ def __init__(self, original_module: torch.nn.Module): # original module's forward function. # So we need set those information that are needed to unflatten the args and kwargs, before calling the # torch.export. - self._device: Optional[torch.device] = None - self._args_schema: Optional[ORTModelInputOutputSchemaType] = None - self._kwargs_schema: Optional[ORTModelInputOutputSchemaType] = None - self._num_positionals: Optional[int] = None + self._device: torch.device | None = None + self._args_schema: ORTModelInputOutputSchemaType | None = None + self._kwargs_schema: ORTModelInputOutputSchemaType | None = None + self._num_positionals: int | None = None # Similarly, to make torch.export happy, we need to flatten the original module's outputs into a 1-D list of tensors. # Need to keep the output schema to unflatten the outputs back to the original structure. # Then those code depends on the original structure of the outputs can work properly. - self._output_schema: Optional[ORTModelInputOutputSchemaType] = None + self._output_schema: ORTModelInputOutputSchemaType | None = None def forward(self, *args): new_args = unflatten_data_using_schema(args[: self._num_positionals], self._args_schema) @@ -150,17 +150,17 @@ def forward(self, *args): class ModelInfoForExport: def __init__( self, - onnx_graph_input_names: List[str], - onnx_graph_input_names_require_grad: List[str], - onnx_graph_input_dynamic_axes_map: Dict[str, Dict[int, str]], - onnx_graph_input_shapes: List[List[int]], - onnx_graph_input_data_accessor_user_defined: Optional[Dict[str, callable]] = None, - onnx_graph_input_const_as_tensor: Optional[Dict[str, torch.device]] = None, - onnx_graph_input_arg_schema: Optional[Dict[str, ORTModelInputOutputSchemaType]] = None, - onnx_graph_input_kwarg_schema: Optional[Dict[str, ORTModelInputOutputSchemaType]] = None, + onnx_graph_input_names: list[str], + onnx_graph_input_names_require_grad: list[str], + onnx_graph_input_dynamic_axes_map: dict[str, dict[int, str]], + onnx_graph_input_shapes: list[list[int]], + onnx_graph_input_data_accessor_user_defined: dict[str, callable] | None = None, + onnx_graph_input_const_as_tensor: dict[str, torch.device] | None = None, + onnx_graph_input_arg_schema: dict[str, ORTModelInputOutputSchemaType] | None = None, + onnx_graph_input_kwarg_schema: dict[str, ORTModelInputOutputSchemaType] | None = None, num_positional_args: int = 0, - export_mode: Optional[int] = None, - export_extra_kwargs: Optional[Dict[str, any]] = None, + export_mode: int | None = None, + export_extra_kwargs: dict[str, any] | None = None, ): # Value can be either torch.onnx.TrainingMode.TRAINING or torch.onnx.TrainingMode.EVAL self.export_mode = export_mode @@ -172,41 +172,41 @@ def __init__( # Input names parsed and then flatten from the model's forward function signature. # This should contains ONLY the user defined input names # Be noted: some of the input might not be used by the model for its compute. - self.onnx_graph_input_names: List[str] = onnx_graph_input_names + self.onnx_graph_input_names: list[str] = onnx_graph_input_names # A subset of onnx_graph_input_names. # Input names that require gradient parsed and then flatten from the model's forward function signature # This should contains ONLY the user defined input names # Be noted: some of the input might not be used by the model for its compute. - self.onnx_graph_input_names_require_grad: List[str] = onnx_graph_input_names_require_grad + self.onnx_graph_input_names_require_grad: list[str] = onnx_graph_input_names_require_grad # Create symbolic names for each dimension of the graph input (e.g. onnx_graph_input_names). # The key is the input name, the value is a dict of {dim_index: symbolic_dim_name} # e.g. {"input1": {0: "input1_dim0", 1: "input1_dim1"}, "input2": {0: "input2_dim0"}} - self.onnx_graph_input_dynamic_axes_map: Dict[str, Dict[int, str]] = onnx_graph_input_dynamic_axes_map + self.onnx_graph_input_dynamic_axes_map: dict[str, dict[int, str]] = onnx_graph_input_dynamic_axes_map - self.onnx_graph_input_shapes: List[List[int]] = onnx_graph_input_shapes + self.onnx_graph_input_shapes: list[list[int]] = onnx_graph_input_shapes # The input args schema for the original model's forward function. # Only contains the schema for those inputs used by the model for its compute (e.g. as the inputs # of the export model). - self.onnx_graph_input_arg_schema: Dict[str, ORTModelInputOutputSchemaType] = onnx_graph_input_arg_schema + self.onnx_graph_input_arg_schema: dict[str, ORTModelInputOutputSchemaType] = onnx_graph_input_arg_schema # The input kwargs schema for the original model's forward function. # Only contains the schema for those inputs used by the model for its compute (e.g. as the inputs # of the export model). - self.onnx_graph_input_kwarg_schema: Dict[str, ORTModelInputOutputSchemaType] = onnx_graph_input_kwarg_schema + self.onnx_graph_input_kwarg_schema: dict[str, ORTModelInputOutputSchemaType] = onnx_graph_input_kwarg_schema self.num_positional_args: int = num_positional_args # A function to access the input data from the args and kwargs. # If it is not None, the length is same as onnx_graph_input_names. # For i-th input name, we can use the i-th function to get the input data from args and kwargs. - self.onnx_graph_input_data_accessor_user_defined: Optional[Dict[str, callable]] = ( + self.onnx_graph_input_data_accessor_user_defined: dict[str, callable] | None = ( onnx_graph_input_data_accessor_user_defined ) - self.onnx_graph_input_const_as_tensor: Optional[Dict[str, torch.device]] = onnx_graph_input_const_as_tensor + self.onnx_graph_input_const_as_tensor: dict[str, torch.device] | None = onnx_graph_input_const_as_tensor def __str__(self) -> str: return f"""ModelInfoForExport class: @@ -237,14 +237,14 @@ class SkipRetValue: def parse_inputs_for_onnx_export( - all_input_parameters: List[inspect.Parameter], + all_input_parameters: list[inspect.Parameter], args: Sequence[ORTModelInputOutputType], kwargs: Mapping[str, ORTModelInputOutputType], constant_as_tensor: bool, device: torch.device, export_mode: int, logger: Logger, - export_extra_kwargs: Optional[Dict[str, any]] = None, + export_extra_kwargs: dict[str, any] | None = None, ) -> ModelInfoForExport: """Parses through the model inputs and returns _InputInfo. @@ -275,7 +275,7 @@ def parse_inputs_for_onnx_export( arg_tensor_idx = [-1] kwarg_tensor_idx = [-1] - def _add_dynamic_shape(name, input) -> Dict[str, Dict[int, str]]: + def _add_dynamic_shape(name, input) -> dict[str, dict[int, str]]: dynamic_axes[name] = {} for dim_idx in range(len(input.shape)): dynamic_axes[name].update({dim_idx: f"{name}_dim{dim_idx}"}) @@ -285,7 +285,7 @@ def _warn_of_constant_inputs(data): logger.info(f"Received input of type {type(data)} is treated as a constant by ORT by default.") def _add_input( - name: str, input_value, onnx_graph_input_names: List[str], cur_func: Callable, tensor_idx: List[int] + name: str, input_value, onnx_graph_input_names: list[str], cur_func: Callable, tensor_idx: list[int] ): """Returns number of expanded non none inputs that _add_input processed""" @@ -396,16 +396,16 @@ def _access_func(key, cur_func, args, kwargs): raise ORTModuleIOError(f"ORTModule does not support input type {type(value)} for input {name}") - visited_input_names: List[str] = [] + visited_input_names: list[str] = [] - onnx_graph_input_names: List[str] = [] - dynamic_axes: Dict[str, Dict[int, str]] = {} - input_names_require_grad: List[str] = [] - input_shape: List[List[int]] = [] + onnx_graph_input_names: list[str] = [] + dynamic_axes: dict[str, dict[int, str]] = {} + input_names_require_grad: list[str] = [] + input_shape: list[list[int]] = [] input_arg_schema: ORTModelInputOutputSchemaType = [] input_kwarg_schema: ORTModelInputOutputSchemaType = OrderedDict() - data_accessors: Dict[str, Callable] = OrderedDict() - const_to_tensor_inputs: Dict[str, torch.device] = OrderedDict() + data_accessors: dict[str, Callable] = OrderedDict() + const_to_tensor_inputs: dict[str, torch.device] = OrderedDict() num_positional_args: int = 0 var_positional_idx = 0 @@ -511,7 +511,7 @@ def calculate_total_parameter_size_in_bytes(module: torch.nn.Module) -> int: return total_size -def can_module_be_deep_cloned(module: torch.nn.Module, device: Optional[torch.device]) -> bool: +def can_module_be_deep_cloned(module: torch.nn.Module, device: torch.device | None) -> bool: """Check if the module can be cloned If the 2 times total module parameter size >= device memory, the module cannot be cloned. @@ -568,8 +568,8 @@ def parse_outputs_for_onnx_export_and_extract_schema( sample_outputs = model_copy(*sample_args_copy, **sample_kwargs_copy) # Parse the output and extract the output_names and output_dynamic_axes to be used for onnx export - output_names: List[str] = [] - output_dynamic_axes: Dict[str, Dict[int, str]] = {} + output_names: list[str] = [] + output_dynamic_axes: dict[str, dict[int, str]] = {} for output_idx, output in enumerate(sample_outputs): output_name = f"output-{output_idx}" output_names.append(output_name) diff --git a/orttraining/orttraining/python/training/ortmodule/_logger.py b/orttraining/orttraining/python/training/ortmodule/_logger.py index 4d54e8e59fb50..00acae9061495 100644 --- a/orttraining/orttraining/python/training/ortmodule/_logger.py +++ b/orttraining/orttraining/python/training/ortmodule/_logger.py @@ -9,10 +9,10 @@ import tempfile import textwrap import time +from collections.abc import Callable from contextlib import contextmanager from enum import IntEnum from functools import partial -from typing import Callable, Dict, List, Optional from onnxruntime.capi._pybind_state import Severity @@ -28,7 +28,7 @@ class LogLevel(IntEnum): FATAL = 5 -ORTMODULE_LOG_LEVEL_MAP: Dict[LogLevel, List[int]] = { +ORTMODULE_LOG_LEVEL_MAP: dict[LogLevel, list[int]] = { LogLevel.VERBOSE: [Severity.VERBOSE, logging.DEBUG], LogLevel.DEVINFO: [Severity.INFO, logging.INFO], # ONNX Runtime has too many INFO logs, so we map it to WARNING for a better user experience. @@ -107,8 +107,8 @@ class TimeTracker: def __init__( self, ): - self.starts_: List[float] = [TimeTracker.NOT_RECORD] * len(ORTModuleInitPhase) - self.ends_: List[float] = [TimeTracker.NOT_RECORD] * len(ORTModuleInitPhase) + self.starts_: list[float] = [TimeTracker.NOT_RECORD] * len(ORTModuleInitPhase) + self.ends_: list[float] = [TimeTracker.NOT_RECORD] * len(ORTModuleInitPhase) def start(self, phase: ORTModuleInitPhase): self.starts_[phase] = time.time() @@ -184,7 +184,7 @@ def wrapper(*args, **kwargs): @contextmanager -def _suppress_os_stream_output(enable=True, on_exit: Optional[Callable] = None): +def _suppress_os_stream_output(enable=True, on_exit: Callable | None = None): """Suppress output from being printed to stdout and stderr. If on_exit is not None, it will be called when the context manager exits. @@ -224,7 +224,7 @@ def _suppress_os_stream_output(enable=True, on_exit: Optional[Callable] = None): yield -def _log_with_filter(logger: logging.Logger, record_filters: Optional[List[str]], name: Optional[str], fo): +def _log_with_filter(logger: logging.Logger, record_filters: list[str] | None, name: str | None, fo): """Log the content by filtering with list of string patterns. Args: logger: The logger to log the content. diff --git a/orttraining/orttraining/python/training/ortmodule/_onnx_models.py b/orttraining/orttraining/python/training/ortmodule/_onnx_models.py index 4b6011f0786ec..3f9262bc010c2 100644 --- a/orttraining/orttraining/python/training/ortmodule/_onnx_models.py +++ b/orttraining/orttraining/python/training/ortmodule/_onnx_models.py @@ -4,7 +4,6 @@ import os from dataclasses import dataclass -from typing import Optional import onnx import torch @@ -31,7 +30,7 @@ class ONNXModels: It has further optimizations done by the InferenceSession and is saved by the InferenceSession. """ - optimized_model: Optional[onnx.ModelProto] = None + optimized_model: onnx.ModelProto | None = None def save_optimized_model(self, path, name_prefix, export_mode): # save the ortmodule optimized model diff --git a/orttraining/orttraining/python/training/ortmodule/_runtime_inspector.py b/orttraining/orttraining/python/training/ortmodule/_runtime_inspector.py index 773c506d28ef4..6026ecb861efa 100644 --- a/orttraining/orttraining/python/training/ortmodule/_runtime_inspector.py +++ b/orttraining/orttraining/python/training/ortmodule/_runtime_inspector.py @@ -7,7 +7,6 @@ import tempfile from enum import IntEnum from logging import Logger -from typing import Dict, List, Optional, Tuple, Union import onnx import torch @@ -66,8 +65,8 @@ class MemoryOptimizationSummary: def __init__(self, saving_str="", simplified_saving_expr=None, evaluated_saving=None, freq=0): self.raw_symbolic_saving_str = saving_str - self.simplified_symbolic_saving_expr: Optional[Symbol] = simplified_saving_expr - self.evaluated_saving: Union[str, int, None] = evaluated_saving + self.simplified_symbolic_saving_expr: Symbol | None = simplified_saving_expr + self.evaluated_saving: str | int | None = evaluated_saving self.freq = freq @@ -93,9 +92,9 @@ def __init__(self, m: torch.nn.Module, logger: Logger, training: bool): self._is_enabled = True # Memory optimization related. - self.cluster_id_combination_to_saving_symbolics_map: Dict[str, MemoryOptimizationSummary] = {} + self.cluster_id_combination_to_saving_symbolics_map: dict[str, MemoryOptimizationSummary] = {} ## The value is a list of symbolic dim values parsed from the first batch. - self.symbolic_dim_name_to_value_map: Dict = {} + self.symbolic_dim_name_to_value_map: dict = {} ## Used to control only the first batch is used to collect symbolic dim values. self.symbolic_dim_collecting_completed = False @@ -132,8 +131,8 @@ def enable_memory_stats_by_step(self, print_memory_stats_by_step: bool): def collect_symbolic_dim_values( self, - onnx_input_name_to_dynamic_axes_map: Dict[str, Dict[int, str]], - onnx_input_to_value_map: Dict[str, torch.Tensor], + onnx_input_name_to_dynamic_axes_map: dict[str, dict[int, str]], + onnx_input_to_value_map: dict[str, torch.Tensor], ): """Collect symbolic dim values.""" for input_name, dynamic_axes in onnx_input_name_to_dynamic_axes_map.items(): @@ -169,7 +168,7 @@ def find_memory_optimization_opportunity(self, execution_agent: TrainingAgent, r memory_optimizer_config_file_path, recompute_probe_config, False ) - cluster_id_to_saving_symbol_map: Dict[str, MemoryOptimizationSummary] = {} + cluster_id_to_saving_symbol_map: dict[str, MemoryOptimizationSummary] = {} for cluster_id, memory_saving_stat in memory_optimization_saving_symbolics.items(): memory_saving_symbolic = memory_saving_stat[0] freq = memory_saving_stat[1] @@ -201,7 +200,6 @@ def find_memory_optimization_opportunity(self, execution_agent: TrainingAgent, r _MemoryOptimizationLevel.TRANSFORMER_LAYERWISE_RECOMPUTE, _MemoryOptimizationLevel.TRANSFORMER_LAYERWISE_RECOMPUTE_WITH_COMPROMISE, ]: - apply_config = [] for cluster_id in self.cluster_id_combination_to_saving_symbolics_map: @@ -229,7 +227,7 @@ def find_memory_optimization_opportunity(self, execution_agent: TrainingAgent, r apply_config.append(",".join(recompute_configs)) - self._json_file_for_layerwise_recompute = tempfile.NamedTemporaryFile(mode="w+") + self._json_file_for_layerwise_recompute = tempfile.NamedTemporaryFile(mode="w+") # noqa: SIM115 json.dump(apply_config, self._json_file_for_layerwise_recompute) self._json_file_for_layerwise_recompute.flush() runtime_options.memory_optimizer_config_file_path = self._json_file_for_layerwise_recompute.name @@ -283,7 +281,7 @@ def _increase_step(self): def display_memory_optimization_plans( self, memory_optimizer_config_file_path, details=False - ) -> Tuple[List[str], PTable]: + ) -> tuple[list[str], PTable]: mem_plan_count = len(self.cluster_id_combination_to_saving_symbolics_map) if mem_plan_count > 0: @@ -387,9 +385,9 @@ def backward(ctx, grad_output: torch.Tensor): @staticmethod def infer_shape( node: onnx.NodeProto, - tensor_input_shapes: List[Optional[List[Union[int, str]]]], - tensor_input_dtypes: List[torch.onnx.TensorProtoDataType], - ) -> Tuple[List[Optional[List[Union[int, str]]]], List[torch.onnx.TensorProtoDataType]]: + tensor_input_shapes: list[list[int | str] | None], + tensor_input_dtypes: list[torch.onnx.TensorProtoDataType], + ) -> tuple[list[list[int | str] | None], list[torch.onnx.TensorProtoDataType]]: return tensor_input_shapes, tensor_input_dtypes @staticmethod diff --git a/orttraining/orttraining/python/training/ortmodule/_torch_module_interface.py b/orttraining/orttraining/python/training/ortmodule/_torch_module_interface.py index 897bf89c15063..2ae3c98137cbd 100644 --- a/orttraining/orttraining/python/training/ortmodule/_torch_module_interface.py +++ b/orttraining/orttraining/python/training/ortmodule/_torch_module_interface.py @@ -3,7 +3,8 @@ # _torch_module_interface.py from collections import OrderedDict -from typing import Callable, Iterator, Optional, Tuple, TypeVar +from collections.abc import Callable, Iterator +from typing import Optional, TypeVar import torch @@ -58,10 +59,10 @@ def state_dict(self, destination=None, prefix="", keep_vars=False): def load_state_dict(self, state_dict: "OrderedDict[str, torch.Tensor]", strict: bool = True): raise NotImplementedError(f"load_state_dict is not implemented for {type(self)}.") - def register_buffer(self, name: str, tensor: Optional[torch.Tensor], persistent: bool = True) -> None: + def register_buffer(self, name: str, tensor: torch.Tensor | None, persistent: bool = True) -> None: raise NotImplementedError(f"register_buffer is not implemented for {type(self)}.") - def register_parameter(self, name: str, param: Optional[torch.nn.Parameter]) -> None: + def register_parameter(self, name: str, param: torch.nn.Parameter | None) -> None: raise NotImplementedError(f"register_parameter is not implemented for {type(self)}.") def get_parameter(self, target: str) -> torch.nn.Parameter: @@ -73,13 +74,13 @@ def get_buffer(self, target: str) -> torch.Tensor: def parameters(self, recurse: bool = True) -> Iterator[torch.nn.Parameter]: raise NotImplementedError(f"parameters is not implemented for {type(self)}.") - def named_parameters(self, prefix: str = "", recurse: bool = True) -> Iterator[Tuple[str, torch.nn.Parameter]]: + def named_parameters(self, prefix: str = "", recurse: bool = True) -> Iterator[tuple[str, torch.nn.Parameter]]: raise NotImplementedError(f"named_parameters is not implemented for {type(self)}.") def buffers(self, recurse: bool = True) -> Iterator[torch.Tensor]: raise NotImplementedError(f"buffers is not implemented for {type(self)}.") - def named_buffers(self, prefix: str = "", recurse: bool = True) -> Iterator[Tuple[str, torch.Tensor]]: + def named_buffers(self, prefix: str = "", recurse: bool = True) -> Iterator[tuple[str, torch.Tensor]]: raise NotImplementedError(f"named_buffers is not implemented for {type(self)}.") def _load_from_state_dict( @@ -87,7 +88,7 @@ def _load_from_state_dict( ): raise NotImplementedError(f"_load_from_state_dict is not implemented for {type(self)}.") - def named_children(self) -> Iterator[Tuple[str, T]]: + def named_children(self) -> Iterator[tuple[str, T]]: raise NotImplementedError(f"named_children is not implemented for {type(self)}.") def modules(self) -> Iterator[T]: diff --git a/orttraining/orttraining/python/training/ortmodule/_torch_module_ort.py b/orttraining/orttraining/python/training/ortmodule/_torch_module_ort.py index 125590902294d..2ed346fe0bfa6 100644 --- a/orttraining/orttraining/python/training/ortmodule/_torch_module_ort.py +++ b/orttraining/orttraining/python/training/ortmodule/_torch_module_ort.py @@ -3,8 +3,9 @@ # _torch_module_ort.py from collections import OrderedDict +from collections.abc import Callable, Iterator from logging import Logger -from typing import Callable, Iterator, Optional, Tuple, TypeVar +from typing import Optional, TypeVar import torch @@ -75,12 +76,12 @@ def load_state_dict(self, state_dict: "OrderedDict[str, torch.Tensor]", strict: # key names does not need to contain the _module.flattened_module._original_module prefix return self._original_module.load_state_dict(state_dict, strict=strict) - def register_buffer(self, name: str, tensor: Optional[torch.Tensor], persistent: bool = True) -> None: + def register_buffer(self, name: str, tensor: torch.Tensor | None, persistent: bool = True) -> None: """Override original method to delegate execution to the original PyTorch user module""" self._original_module.register_buffer(name, tensor, persistent=persistent) - def register_parameter(self, name: str, param: Optional[torch.nn.Parameter]) -> None: + def register_parameter(self, name: str, param: torch.nn.Parameter | None) -> None: """Override original method to delegate execution to the original PyTorch user module""" self._original_module.register_parameter(name, param) @@ -100,7 +101,7 @@ def parameters(self, recurse: bool = True) -> Iterator[torch.nn.Parameter]: yield from self._original_module.parameters(recurse=recurse) - def named_parameters(self, prefix: str = "", recurse: bool = True) -> Iterator[Tuple[str, torch.nn.Parameter]]: + def named_parameters(self, prefix: str = "", recurse: bool = True) -> Iterator[tuple[str, torch.nn.Parameter]]: """Override original method to delegate execution to the original PyTorch user module""" yield from self._original_module.named_parameters(prefix=prefix, recurse=recurse) @@ -110,7 +111,7 @@ def buffers(self, recurse: bool = True) -> Iterator[torch.Tensor]: yield from self._original_module.buffers(recurse=recurse) - def named_buffers(self, prefix: str = "", recurse: bool = True) -> Iterator[Tuple[str, torch.Tensor]]: + def named_buffers(self, prefix: str = "", recurse: bool = True) -> Iterator[tuple[str, torch.Tensor]]: """Override original method to delegate execution to the original PyTorch user module""" yield from self._original_module.named_buffers(prefix=prefix, recurse=recurse) @@ -129,7 +130,7 @@ def _load_from_state_dict( state_dict, prefix, local_metadata, strict, missing_keys, unexpected_keys, error_msgs ) - def named_children(self) -> Iterator[Tuple[str, T]]: + def named_children(self) -> Iterator[tuple[str, T]]: """Override original method to delegate execution to the original PyTorch user module""" yield from self._original_module.named_children() diff --git a/orttraining/orttraining/python/training/ortmodule/_torch_module_pytorch.py b/orttraining/orttraining/python/training/ortmodule/_torch_module_pytorch.py index 9f7fb1d0dcd16..2c38e98cc8657 100644 --- a/orttraining/orttraining/python/training/ortmodule/_torch_module_pytorch.py +++ b/orttraining/orttraining/python/training/ortmodule/_torch_module_pytorch.py @@ -3,7 +3,8 @@ # _torch_module_pytorch.py from collections import OrderedDict -from typing import Callable, Iterator, Optional, Tuple, TypeVar +from collections.abc import Callable, Iterator +from typing import Optional, TypeVar import torch @@ -38,10 +39,10 @@ def state_dict(self, destination=None, prefix="", keep_vars=False): def load_state_dict(self, state_dict: "OrderedDict[str, torch.Tensor]", strict: bool = True): return self._original_module.load_state_dict(state_dict, strict=strict) - def register_buffer(self, name: str, tensor: Optional[torch.Tensor], persistent: bool = True) -> None: + def register_buffer(self, name: str, tensor: torch.Tensor | None, persistent: bool = True) -> None: self._original_module.register_buffer(name, tensor, persistent=persistent) - def register_parameter(self, name: str, param: Optional[torch.nn.Parameter]) -> None: + def register_parameter(self, name: str, param: torch.nn.Parameter | None) -> None: self._original_module.register_parameter(name, param) def get_parameter(self, target: str) -> torch.nn.Parameter: @@ -53,13 +54,13 @@ def get_buffer(self, target: str) -> torch.Tensor: def parameters(self, recurse: bool = True) -> Iterator[torch.nn.Parameter]: yield from self._original_module.parameters(recurse=recurse) - def named_parameters(self, prefix: str = "", recurse: bool = True) -> Iterator[Tuple[str, torch.nn.Parameter]]: + def named_parameters(self, prefix: str = "", recurse: bool = True) -> Iterator[tuple[str, torch.nn.Parameter]]: yield from self._original_module.named_parameters(prefix=prefix, recurse=recurse) def buffers(self, recurse: bool = True) -> Iterator[torch.Tensor]: yield from self._original_module.buffers(recurse=recurse) - def named_buffers(self, prefix: str = "", recurse: bool = True) -> Iterator[Tuple[str, torch.Tensor]]: + def named_buffers(self, prefix: str = "", recurse: bool = True) -> Iterator[tuple[str, torch.Tensor]]: yield from self._original_module.named_buffers(prefix=prefix, recurse=recurse) def _load_from_state_dict( @@ -69,7 +70,7 @@ def _load_from_state_dict( state_dict, prefix, local_metadata, strict, missing_keys, unexpected_keys, error_msgs ) - def named_children(self) -> Iterator[Tuple[str, T]]: + def named_children(self) -> Iterator[tuple[str, T]]: yield from self._original_module.named_children() def modules(self) -> Iterator[T]: diff --git a/orttraining/orttraining/python/training/ortmodule/_training_manager.py b/orttraining/orttraining/python/training/ortmodule/_training_manager.py index d5d5ce672224c..b4303587e69e6 100644 --- a/orttraining/orttraining/python/training/ortmodule/_training_manager.py +++ b/orttraining/orttraining/python/training/ortmodule/_training_manager.py @@ -4,7 +4,6 @@ # -------------------------------------------------------------------------- from logging import Logger -from typing import Tuple import onnx import torch @@ -48,7 +47,7 @@ def execution_session_run_forward( device: torch.device, gradient_accumulation_manager: GradientAccumulationManager, *inputs, - ) -> Tuple[Tuple[torch.Tensor, ...], _RunStateInfo]: + ) -> tuple[tuple[torch.Tensor, ...], _RunStateInfo]: """Runs the forward pass on `execution_session` with given `onnx_model`, `device` and `inputs` Args: @@ -85,7 +84,7 @@ def execution_session_run_forward( # Run and return module outputs. execution_session.run_forward(forward_inputs, forward_outputs, state, gradient_accumulation_manager.cache) - user_outputs: Tuple[torch.Tensor, ...] = gradient_accumulation_manager.extract_outputs_and_maybe_update_cache( + user_outputs: tuple[torch.Tensor, ...] = gradient_accumulation_manager.extract_outputs_and_maybe_update_cache( forward_outputs, device ) diff --git a/orttraining/orttraining/python/training/ortmodule/_utils.py b/orttraining/orttraining/python/training/ortmodule/_utils.py index 4787cb31a24fd..8b8de848473d7 100644 --- a/orttraining/orttraining/python/training/ortmodule/_utils.py +++ b/orttraining/orttraining/python/training/ortmodule/_utils.py @@ -13,7 +13,7 @@ import random import traceback import types -from typing import Callable, List, Optional, Tuple, Union +from collections.abc import Callable import numpy as np import torch @@ -63,10 +63,10 @@ def _ortvalue_from_torch_tensor(torch_tensor: torch.Tensor) -> C.OrtValue: def _ortvalues_to_torch_tensor( - ortvalues: C.OrtValueVector, device: Optional[torch.device] = None -) -> Tuple[torch.Tensor, ...]: + ortvalues: C.OrtValueVector, device: torch.device | None = None +) -> tuple[torch.Tensor, ...]: if len(ortvalues) == 0: - return tuple() + return () if device is not None and device.type == "ort": if not hasattr(C, "to_aten_ort_device_tensor"): @@ -76,7 +76,7 @@ def _ortvalues_to_torch_tensor( if not isinstance(ortvalues, C.OrtValueVector): raise TypeError(f"ortvalues must be an instance of OrtValueVector not {type(ortvalues)!r}.") - res: List[torch.Tensor] = ortvalues.to_dlpacks(_from_dlpack) + res: list[torch.Tensor] = ortvalues.to_dlpacks(_from_dlpack) bool_indices = ortvalues.bool_tensor_indices() if len(bool_indices): # DLPack structure does not know for sure if it stores boolean @@ -127,7 +127,7 @@ def _check_same_device(device: torch.device, argument_str: str, *args): ) -def get_device_index(device: Union[str, int, torch.device]) -> int: +def get_device_index(device: str | int | torch.device) -> int: if isinstance(device, str): # could be 'cuda:0', 'cuda:1', or 'cpu'. with cpu, set index=0 device = torch.device(device) @@ -136,7 +136,7 @@ def get_device_index(device: Union[str, int, torch.device]) -> int: return 0 if device.index is None else device.index -def get_device_str(device: Union[str, int, torch.device]) -> str: +def get_device_str(device: str | int | torch.device) -> str: if isinstance(device, str): # could be 'cuda:0', 'cuda:1', or 'cpu'. with cpu, set index=0 if device.find(":") == -1: @@ -161,7 +161,7 @@ def get_device_from_module_and_inputs(module, inputs, kwargs): return device -def _get_device_from_module(module) -> Optional[torch.device]: +def _get_device_from_module(module) -> torch.device | None: """Returns the first device found in the `module`'s parameters or None Args: @@ -187,7 +187,7 @@ def _get_device_from_module(module) -> Optional[torch.device]: return device -def _get_device_from_inputs(args, kwargs) -> Optional[torch.device]: +def _get_device_from_inputs(args, kwargs) -> torch.device | None: """Returns device from first PyTorch Tensor within args or kwargs Args: diff --git a/orttraining/orttraining/python/training/ortmodule/_zero_stage3_compatibility.py b/orttraining/orttraining/python/training/ortmodule/_zero_stage3_compatibility.py index 11d978e71d8a8..742d5bb8b5263 100644 --- a/orttraining/orttraining/python/training/ortmodule/_zero_stage3_compatibility.py +++ b/orttraining/orttraining/python/training/ortmodule/_zero_stage3_compatibility.py @@ -5,7 +5,6 @@ from contextlib import contextmanager -from typing import Dict, List, Optional, Tuple, Union import torch from onnx import ModelProto, NodeProto, TensorProto, ValueInfoProto, helper @@ -31,8 +30,8 @@ def post_processing_enable_zero_stage3_compat( exported_model: ModelProto, - zero_stage3_named_params: Dict[str, torch.nn.parameter.Parameter], - all_param_names: List[str], + zero_stage3_named_params: dict[str, torch.nn.parameter.Parameter], + all_param_names: list[str], ) -> ModelProto: """This function is used to enable zero stage3 compatibility. @@ -62,7 +61,7 @@ def post_processing_enable_zero_stage3_compat( def _get_param_pull_trigger_name(param_name: str) -> str: return f"pull_{param_name}" - def _get_func_name(node: NodeProto) -> Optional[str]: + def _get_func_name(node: NodeProto) -> str | None: for attr in node.attribute: if attr.name == "func_name": return attr.s.decode("utf-8") if isinstance(attr.s, bytes) else attr.s @@ -102,9 +101,9 @@ def _get_func_name(node: NodeProto) -> Optional[str]: func_name = _get_func_name(c) if func_name == pre_forward_function_name: - assert ( - pre_forward_pythonop_node is None - ), "Multiple ORTZeROOffloadPreForwardFunction nodes found, it should not happen" + assert pre_forward_pythonop_node is None, ( + "Multiple ORTZeROOffloadPreForwardFunction nodes found, it should not happen" + ) pre_forward_pythonop_node = c if pre_forward_pythonop_node is None: @@ -141,7 +140,7 @@ def _get_func_name(node: NodeProto) -> Optional[str]: # Update consumer's input to use the full-sized parameter output of ORTZeROOffloadPreForwardFunction. for c in consumers: - new_inputs = [c_input for c_input in c.input] + new_inputs = list(c.input) for c_input_index in range(len(c.input)): if c.input[c_input_index] == graph_input.name: new_inputs[c_input_index] = ready_weight_name @@ -210,7 +209,7 @@ def _get_func_name(node: NodeProto) -> Optional[str]: def _create_weight_retrieval_function( - zero_stage3_named_params: Optional[Dict[str, torch.nn.parameter.Parameter]] + zero_stage3_named_params: dict[str, torch.nn.parameter.Parameter] | None, ) -> str: """This function is used to create a weight retrieving function using zero_stage3_named_params.""" @@ -231,9 +230,9 @@ def backward(ctx, *grad_outputs): @staticmethod def infer_shape( node: NodeProto, - tensor_input_shapes: List[Optional[List[Union[int, str]]]], - tensor_input_dtypes: List[torch.onnx.TensorProtoDataType], - ) -> Tuple[List[Optional[List[Union[int, str]]]], List[torch.onnx.TensorProtoDataType]]: + tensor_input_shapes: list[list[int | str] | None], + tensor_input_dtypes: list[torch.onnx.TensorProtoDataType], + ) -> tuple[list[list[int | str] | None], list[torch.onnx.TensorProtoDataType]]: param_count = len(zero_stage3_named_params.values()) tensor_output_shapes = [ tensor_input_shapes[0], @@ -258,9 +257,9 @@ def _register_symbolic_shape_infer_functions(): def _simple_pass_through_infer_shape( node: NodeProto, - tensor_input_shapes: List[Optional[List[Union[int, str]]]], - tensor_input_dtypes: List[torch.onnx.TensorProtoDataType], - ) -> Tuple[List[Optional[List[Union[int, str]]]], List[torch.onnx.TensorProtoDataType]]: + tensor_input_shapes: list[list[int | str] | None], + tensor_input_dtypes: list[torch.onnx.TensorProtoDataType], + ) -> tuple[list[list[int | str] | None], list[torch.onnx.TensorProtoDataType]]: return tensor_input_shapes, tensor_input_dtypes register_shape_inference_function(DEEPSPEED_PRE_BACKWARD_FUNCTION_NAME, _simple_pass_through_infer_shape) @@ -268,9 +267,9 @@ def _simple_pass_through_infer_shape( def _linear_infer_shape( node: NodeProto, - tensor_input_shapes: List[Optional[List[Union[int, str]]]], - tensor_input_dtypes: List[torch.onnx.TensorProtoDataType], - ) -> Tuple[List[Optional[List[Union[int, str]]]], List[torch.onnx.TensorProtoDataType]]: + tensor_input_shapes: list[list[int | str] | None], + tensor_input_dtypes: list[torch.onnx.TensorProtoDataType], + ) -> tuple[list[list[int | str] | None], list[torch.onnx.TensorProtoDataType]]: # output = input.matmul(weight.t()) tensor_input_shapes[0] # input shape2 = tensor_input_shapes[1] # weight @@ -311,13 +310,13 @@ def _alias_input(node_proto_str: str): def _create_weight_retrieval_pythonop( - zero_stage3_named_params: Optional[Dict[str, torch.nn.parameter.Parameter]], + zero_stage3_named_params: dict[str, torch.nn.parameter.Parameter] | None, func_full_qual_name: str, input_name: str, - output_names: List[str], + output_names: list[str], pull_weight_trigger_output_dtype: int, - pull_weight_trigger_output_shape: List[int], -) -> Tuple[ValueInfoProto, NodeProto]: + pull_weight_trigger_output_shape: list[int], +) -> tuple[ValueInfoProto, NodeProto]: """This function is used to create a weight retrieving PythonOp.""" offload_param_count = 0 if zero_stage3_named_params is None else len(zero_stage3_named_params) new_input = helper.make_tensor_value_info( @@ -417,7 +416,7 @@ def stage3_export_context(enable: bool, stage3_param_handle, flattened_module): from torch.onnx._internal import _beartype @_beartype.beartype - def _get_tensor_rank(x) -> Optional[int]: + def _get_tensor_rank(x) -> int | None: ### Adapted from https://github.com/pytorch/pytorch/blob/185515368bcd7d94ac06ab1634f22b747b03c6d9/torch/onnx/symbolic_helper.py#L561 # Retrieve the real rank for the stage3 weights, because stage3 weights are all (0). from typing import cast as typing_cast diff --git a/orttraining/orttraining/python/training/ortmodule/experimental/json_config/_load_config_from_json.py b/orttraining/orttraining/python/training/ortmodule/experimental/json_config/_load_config_from_json.py index 76c8ce3bf3220..7cda02952405b 100644 --- a/orttraining/orttraining/python/training/ortmodule/experimental/json_config/_load_config_from_json.py +++ b/orttraining/orttraining/python/training/ortmodule/experimental/json_config/_load_config_from_json.py @@ -59,9 +59,9 @@ def _load_use_external_gpu_allocator(ortmodule_config_accessor, data): assert hasattr(data, _load_use_external_gpu_allocator.loading_key) log.info(f"Found keyword {_load_use_external_gpu_allocator.loading_key} in json. Loading attributes from file.") - assert isinstance( - data.UseExternalGPUAllocator, bool - ), f"{_load_use_external_gpu_allocator.loading_key} must be a boolean" + assert isinstance(data.UseExternalGPUAllocator, bool), ( + f"{_load_use_external_gpu_allocator.loading_key} must be a boolean" + ) ortmodule_config_accessor._runtime_options.use_external_gpu_allocator = data.UseExternalGPUAllocator @@ -73,9 +73,9 @@ def _load_enable_custom_autograd_function(ortmodule_config_accessor, data): f"Found keyword {_load_enable_custom_autograd_function.loading_key} in json. Loading attributes from file." ) - assert isinstance( - data.EnableCustomAutogradFunction, bool - ), f"{_load_enable_custom_autograd_function.loading_key} must be a boolean" + assert isinstance(data.EnableCustomAutogradFunction, bool), ( + f"{_load_enable_custom_autograd_function.loading_key} must be a boolean" + ) from onnxruntime.training.ortmodule._custom_autograd_function import enable_custom_autograd_support @@ -89,9 +89,9 @@ def _load_enable_grad_acc_optimization(ortmodule_config_accessor, data): assert hasattr(data, _load_enable_grad_acc_optimization.loading_key) log.info(f"Found keyword {_load_enable_grad_acc_optimization.loading_key} in json. Loading attributes from file.") - assert isinstance( - data.EnableGradAccOptimization, bool - ), f"{_load_enable_grad_acc_optimization.loading_key} must be a boolean" + assert isinstance(data.EnableGradAccOptimization, bool), ( + f"{_load_enable_grad_acc_optimization.loading_key} must be a boolean" + ) ortmodule_config_accessor._runtime_options.enable_grad_acc_optimization = data.EnableGradAccOptimization @@ -101,9 +101,9 @@ def _load_run_symbolic_shape_infer(ortmodule_config_accessor, data): assert hasattr(data, _load_run_symbolic_shape_infer.loading_key) log.info(f"Found keyword {_load_run_symbolic_shape_infer.loading_key} in json. Loading attributes from file.") - assert isinstance( - data.RunSymbolicShapeInference, bool - ), f"{_load_run_symbolic_shape_infer.loading_key} must be a boolean" + assert isinstance(data.RunSymbolicShapeInference, bool), ( + f"{_load_run_symbolic_shape_infer.loading_key} must be a boolean" + ) ortmodule_config_accessor._runtime_options.run_symbolic_shape_infer = data.RunSymbolicShapeInference @@ -175,9 +175,9 @@ def _load_use_memory_efficient_gradient(ortmodule_config_accessor, data): assert hasattr(data, _load_use_memory_efficient_gradient.loading_key) log.info(f"Found keyword {_load_use_memory_efficient_gradient.loading_key} in json. Loading attributes from file.") - assert isinstance( - data.UseMemoryEfficientGradient, bool - ), f"{_load_use_memory_efficient_gradient.loading_key} must be a boolean" + assert isinstance(data.UseMemoryEfficientGradient, bool), ( + f"{_load_use_memory_efficient_gradient.loading_key} must be a boolean" + ) ortmodule_config_accessor._runtime_options.use_memory_efficient_gradient = data.UseMemoryEfficientGradient diff --git a/orttraining/orttraining/python/training/ortmodule/graph_optimizer_registry.py b/orttraining/orttraining/python/training/ortmodule/graph_optimizer_registry.py index 897ecac148bfb..fa4c6dd04d81b 100644 --- a/orttraining/orttraining/python/training/ortmodule/graph_optimizer_registry.py +++ b/orttraining/orttraining/python/training/ortmodule/graph_optimizer_registry.py @@ -3,7 +3,7 @@ # Licensed under the MIT License. # -------------------------------------------------------------------------- -from typing import Callable +from collections.abc import Callable from onnx.onnx_ml_pb2 import GraphProto diff --git a/orttraining/orttraining/python/training/ortmodule/graph_optimizers/_aten_attn.py b/orttraining/orttraining/python/training/ortmodule/graph_optimizers/_aten_attn.py index c1fb6e68568f5..b5e5ae45f3631 100644 --- a/orttraining/orttraining/python/training/ortmodule/graph_optimizers/_aten_attn.py +++ b/orttraining/orttraining/python/training/ortmodule/graph_optimizers/_aten_attn.py @@ -15,8 +15,6 @@ support if we want to try in the future. """ -from typing import List, Tuple - from onnx import GraphProto, NodeProto, TensorProto, helper from ..graph_optimizer_registry import register_graph_optimizer @@ -125,7 +123,7 @@ def _make_efficient_attention_nodes( # Without causal mask, with Dropout. For example, BERT model in HuggingFace. -_PATTERN_0: List[Tuple[str, bool, List[Tuple[int, int, int]]]] = [ +_PATTERN_0: list[tuple[str, bool, list[tuple[int, int, int]]]] = [ ("MatMul", False, []), # 0 ("Transpose", True, [(0, 0, 0)]), # 1 ("Transpose", True, [(0, 0, 1)]), # 2 @@ -152,7 +150,7 @@ def _make_efficient_attention_nodes( ] -def _optimize_for_pattern_0(matcher: GraphMatcher, idx: int, nodes: List[NodeProto]): +def _optimize_for_pattern_0(matcher: GraphMatcher, idx: int, nodes: list[NodeProto]): # Check forward only as the backward is expected to be consistent if it's built correctly. scale_value = matcher.get_constant_value(nodes[3].input[1]) ratio_value = matcher.get_constant_value(nodes[6].input[1]) @@ -188,7 +186,7 @@ def _optimize_for_pattern_0(matcher: GraphMatcher, idx: int, nodes: List[NodePro # Without causal mask, without Dropout. For example, BERT model and disabling attention dropout in HuggingFace. -_PATTERN_1: List[Tuple[str, bool, List[Tuple[int, int, int]]]] = [ +_PATTERN_1: list[tuple[str, bool, list[tuple[int, int, int]]]] = [ ("MatMul", False, []), # 0 ("Transpose", True, [(0, 0, 0)]), # 1 ("Transpose", True, [(0, 0, 1)]), # 2 @@ -213,7 +211,7 @@ def _optimize_for_pattern_0(matcher: GraphMatcher, idx: int, nodes: List[NodePro ] -def _optimize_for_pattern_1(matcher: GraphMatcher, idx: int, nodes: List[NodeProto]): +def _optimize_for_pattern_1(matcher: GraphMatcher, idx: int, nodes: list[NodeProto]): # Check forward only as the backward is expected to be consistent if it's built correctly. scale_value = matcher.get_constant_value(nodes[3].input[1]) if not ( diff --git a/orttraining/orttraining/python/training/ortmodule/graph_optimizers/utils.py b/orttraining/orttraining/python/training/ortmodule/graph_optimizers/utils.py index fbd98675aebe6..9089004559923 100644 --- a/orttraining/orttraining/python/training/ortmodule/graph_optimizers/utils.py +++ b/orttraining/orttraining/python/training/ortmodule/graph_optimizers/utils.py @@ -4,7 +4,8 @@ # -------------------------------------------------------------------------- import itertools -from typing import Any, Dict, List, Sequence, Tuple +from collections.abc import Sequence +from typing import Any import numpy as np from onnx import GraphProto, NodeProto, TensorProto, helper, numpy_helper @@ -54,8 +55,8 @@ class GraphMatcher: def __init__(self, graph: GraphProto): self._graph: GraphProto = graph - self._op_type_to_nodes: Dict[str, List[NodeProto]] = {} - self._consumer_count: Dict[str, int] = {} + self._op_type_to_nodes: dict[str, list[NodeProto]] = {} + self._consumer_count: dict[str, int] = {} for node in graph.node: if node.op_type not in self._op_type_to_nodes: self._op_type_to_nodes[node.op_type] = [] @@ -117,7 +118,7 @@ def get_type_and_shape(self, arg: str): return initializers[0].data_type, initializers[0].dims return None, None - def _match_pattern(self, node: NodeProto, pattern: List[Tuple[str, bool, List[Tuple[int, int, int]]]]): + def _match_pattern(self, node: NodeProto, pattern: list[tuple[str, bool, list[tuple[int, int, int]]]]): nodes = [node] for i in range(1, len(pattern)): next_op_type = pattern[i][0] @@ -140,7 +141,7 @@ def _match_pattern(self, node: NodeProto, pattern: List[Tuple[str, bool, List[Tu nodes.append(next_node) return nodes - def match_pattern(self, pattern: List[Tuple[str, bool, List[Tuple[int, int, int]]]]): + def match_pattern(self, pattern: list[tuple[str, bool, list[tuple[int, int, int]]]]): for node in self._op_type_to_nodes.get(pattern[0][0], []): result = self._match_pattern(node, pattern) if len(result) == len(pattern): @@ -165,9 +166,9 @@ def make_constant_node(name: str, dtype: TensorProto.DataType, dims: Sequence[in def update_graph( graph: GraphProto, - nodes_to_remove: List[NodeProto], - nodes_to_add: List[NodeProto], - new_value_infos: List[TensorProto] = [], # noqa: B006 + nodes_to_remove: list[NodeProto], + nodes_to_add: list[NodeProto], + new_value_infos: list[TensorProto] = [], # noqa: B006 ): """Update an ONNX graph by removing some nodes, and adding some new nodes and value infos.""" nodes = [node for node in graph.node if node not in nodes_to_remove] diff --git a/orttraining/orttraining/python/training/ortmodule/ortmodule.py b/orttraining/orttraining/python/training/ortmodule/ortmodule.py index b291bfb2ba03c..a7942eea5be26 100644 --- a/orttraining/orttraining/python/training/ortmodule/ortmodule.py +++ b/orttraining/orttraining/python/training/ortmodule/ortmodule.py @@ -18,7 +18,9 @@ from onnxruntime.tools import pytorch_export_contrib_ops import torch -from typing import Iterator, Optional, OrderedDict, Tuple, TypeVar, Callable +from typing import TypeVar +from collections import OrderedDict +from collections.abc import Iterator, Callable # Needed to override PyTorch methods T = TypeVar("T", bound="torch.nn.Module") @@ -35,7 +37,7 @@ class ORTModule(torch.nn.Module): debug_options (:obj:`DebugOptions`, optional): debugging options for ORTModule. """ - def __init__(self, module: torch.nn.Module, debug_options: Optional[DebugOptions] = None): + def __init__(self, module: torch.nn.Module, debug_options: DebugOptions | None = None): # NOTE: torch.nn.Modules that call setattr on their internal attributes regularly # (for example PyTorch Lightning), will trigger regular re-exports. This is # because ORTModule auto detects such setattrs on the original module and @@ -154,7 +156,7 @@ def _replicate_for_data_parallel(self): return self._torch_module._replicate_for_data_parallel() - def add_module(self, name: str, module: Optional[torch.nn.Module]) -> None: + def add_module(self, name: str, module: torch.nn.Module | None) -> None: """Raises a ORTModuleTorchModelException exception since ORTModule does not support adding modules to it""" self._torch_module.add_module(name, module) @@ -217,12 +219,12 @@ def load_state_dict(self, state_dict: "OrderedDict[str, torch.Tensor]", strict: return self._torch_module.load_state_dict(state_dict, strict=strict) - def register_buffer(self, name: str, tensor: Optional[torch.Tensor], persistent: bool = True) -> None: + def register_buffer(self, name: str, tensor: torch.Tensor | None, persistent: bool = True) -> None: """Override :meth:`~torch.nn.Module.register_buffer`""" self._torch_module.register_buffer(name, tensor, persistent=persistent) - def register_parameter(self, name: str, param: Optional[torch.nn.Parameter]) -> None: + def register_parameter(self, name: str, param: torch.nn.Parameter | None) -> None: """Override :meth:`~torch.nn.Module.register_parameter`""" self._torch_module.register_parameter(name, param) @@ -242,7 +244,7 @@ def parameters(self, recurse: bool = True) -> Iterator[torch.nn.Parameter]: yield from self._torch_module.parameters(recurse=recurse) - def named_parameters(self, prefix: str = "", recurse: bool = True) -> Iterator[Tuple[str, torch.nn.Parameter]]: + def named_parameters(self, prefix: str = "", recurse: bool = True) -> Iterator[tuple[str, torch.nn.Parameter]]: """Override :meth:`~torch.nn.Module.named_parameters`""" yield from self._torch_module.named_parameters(prefix=prefix, recurse=recurse) @@ -252,7 +254,7 @@ def buffers(self, recurse: bool = True) -> Iterator[torch.Tensor]: yield from self._torch_module.buffers(recurse=recurse) - def named_buffers(self, prefix: str = "", recurse: bool = True) -> Iterator[Tuple[str, torch.Tensor]]: + def named_buffers(self, prefix: str = "", recurse: bool = True) -> Iterator[tuple[str, torch.Tensor]]: """Override :meth:`~torch.nn.Module.named_buffers`""" yield from self._torch_module.named_buffers(prefix=prefix, recurse=recurse) @@ -266,7 +268,7 @@ def _load_from_state_dict( state_dict, prefix, local_metadata, strict, missing_keys, unexpected_keys, error_msgs ) - def named_children(self) -> Iterator[Tuple[str, torch.nn.Module]]: + def named_children(self) -> Iterator[tuple[str, torch.nn.Module]]: """Override :meth:`~torch.nn.Module.named_children`""" yield from self._torch_module.named_children() diff --git a/orttraining/orttraining/python/training/ortmodule/torch_cpp_extensions/cuda/fused_ops/type_shim.h b/orttraining/orttraining/python/training/ortmodule/torch_cpp_extensions/cuda/fused_ops/type_shim.h index 3d508b80a0c2c..93f6945a264ee 100644 --- a/orttraining/orttraining/python/training/ortmodule/torch_cpp_extensions/cuda/fused_ops/type_shim.h +++ b/orttraining/orttraining/python/training/ortmodule/torch_cpp_extensions/cuda/fused_ops/type_shim.h @@ -51,7 +51,7 @@ __device__ __forceinline__ T reduce_block_into_lanes(T* x, T val, int lanes = 1, final = x[tid] + x[tid + 32]; else final = val; - // __SYNCWARP(); + // __SYNCWARP(); #pragma unroll #if defined(CUDA_VERSION) && CUDA_VERSION >= 9000 diff --git a/orttraining/orttraining/python/training/utils/__init__.py b/orttraining/orttraining/python/training/utils/__init__.py index ecfb7d7907f3c..d4d6bf3134809 100644 --- a/orttraining/orttraining/python/training/utils/__init__.py +++ b/orttraining/orttraining/python/training/utils/__init__.py @@ -24,17 +24,17 @@ ) __all__ = [ - "PrimitiveType", - "ORTModelInputOutputType", "ORTModelInputOutputSchemaType", + "ORTModelInputOutputType", + "PTable", + "PrimitiveType", "extract_data_and_schema", - "unflatten_data_using_schema", - "torch_nvtx_range_push", - "torch_nvtx_range_pop", - "nvtx_function_decorator", "log_memory_usage", - "pytorch_type_to_onnx_dtype", + "nvtx_function_decorator", "onnx_dtype_to_pytorch_dtype", "pytorch_scalar_type_to_pytorch_dtype", - "PTable", + "pytorch_type_to_onnx_dtype", + "torch_nvtx_range_pop", + "torch_nvtx_range_push", + "unflatten_data_using_schema", ] diff --git a/orttraining/orttraining/python/training/utils/data/sampler.py b/orttraining/orttraining/python/training/utils/data/sampler.py index afc4d360b1582..8dfe576714609 100644 --- a/orttraining/orttraining/python/training/utils/data/sampler.py +++ b/orttraining/orttraining/python/training/utils/data/sampler.py @@ -3,7 +3,7 @@ # sampler.py import math -from typing import Callable, Iterator, Optional +from collections.abc import Callable, Iterator import numpy as np import torch @@ -106,10 +106,10 @@ def __init__( self, dataset: Dataset, complexity_fn: Callable[..., int], - world_size: Optional[int] = None, - rank: Optional[int] = None, + world_size: int | None = None, + rank: int | None = None, shuffle: bool = True, - group_size: Optional[int] = None, + group_size: int | None = None, seed: int = 0, drop_last: bool = False, random_level: float = 0, diff --git a/orttraining/orttraining/python/training/utils/hooks/__init__.py b/orttraining/orttraining/python/training/utils/hooks/__init__.py index 89c0d44abbb7a..8ff36a7eee2e7 100644 --- a/orttraining/orttraining/python/training/utils/hooks/__init__.py +++ b/orttraining/orttraining/python/training/utils/hooks/__init__.py @@ -7,11 +7,11 @@ import torch __all__ = [ - "StatisticsSubscriber", "GlobalSubscriberManager", - "inspect_activation", + "StatisticsSubscriber", "ZeROOffloadSubscriber", "configure_ort_compatible_zero_stage3", + "inspect_activation", ] from ._statistics_subscriber import StatisticsSubscriber, _InspectActivation diff --git a/orttraining/orttraining/python/training/utils/hooks/_statistics_subscriber.py b/orttraining/orttraining/python/training/utils/hooks/_statistics_subscriber.py index a8e730488d76d..d466faddf91bc 100644 --- a/orttraining/orttraining/python/training/utils/hooks/_statistics_subscriber.py +++ b/orttraining/orttraining/python/training/utils/hooks/_statistics_subscriber.py @@ -8,7 +8,6 @@ import warnings from io import TextIOWrapper from pathlib import Path -from typing import List, Optional, Tuple, Union import onnx import torch @@ -29,7 +28,7 @@ class _InspectActivation(torch.autograd.Function): def forward( ctx, activation_name: str, - module_idx: Optional[int], + module_idx: int | None, run_ctx: RuntimeStates, input_tensor: torch.Tensor, module_post_forward, @@ -89,9 +88,9 @@ def backward(ctx, grad_output: torch.Tensor): @staticmethod def infer_shape( node: onnx.NodeProto, - tensor_input_shapes: List[Optional[List[Union[int, str]]]], - tensor_input_dtypes: List[torch.onnx.TensorProtoDataType], - ) -> Tuple[List[Optional[List[Union[int, str]]]], List[torch.onnx.TensorProtoDataType]]: + tensor_input_shapes: list[list[int | str] | None], + tensor_input_dtypes: list[torch.onnx.TensorProtoDataType], + ) -> tuple[list[list[int | str] | None], list[torch.onnx.TensorProtoDataType]]: return tensor_input_shapes, tensor_input_dtypes @staticmethod @@ -124,8 +123,8 @@ class StatisticsSubscriber(SubscriberBase): def __init__( self, output_dir: str, - start_step: Union[None, int] = None, - end_step: Union[None, int] = None, + start_step: None | int = None, + end_step: None | int = None, override_output_dir: bool = False, run_on_cpu: bool = False, bucket_size: int = 1024 * 1024 * 1024 // 2, @@ -278,11 +277,11 @@ def _summarize_tensor( std_value = torch.sqrt(s.sum() / (element_count - 1)) f.write( - f"{'>'*max(0, depth) + display_name} shape: {tensor_shape} dtype: {tensor_dtype} size: {flatten_array.size()} \n" + f"{'>' * max(0, depth) + display_name} shape: {tensor_shape} dtype: {tensor_dtype} size: {flatten_array.size()} \n" f"min: {min_value} max: {max_value}, mean: {mean_value}, " f"std: {std_value} \n" f"nan: {num_nan}, inf: {num_inf}\n" ) f.write(f"samples(top 128): {flatten_array[:128]}\n") f.write(f"neg: {num_neg}, pos: {num_pos}, zero: {num_zero},\n") - f.write(f"{'='*16}\n") + f.write(f"{'=' * 16}\n") diff --git a/orttraining/orttraining/python/training/utils/hooks/_subscriber_base.py b/orttraining/orttraining/python/training/utils/hooks/_subscriber_base.py index 1b9a6fc91ec3c..05c58b86b993f 100644 --- a/orttraining/orttraining/python/training/utils/hooks/_subscriber_base.py +++ b/orttraining/orttraining/python/training/utils/hooks/_subscriber_base.py @@ -5,7 +5,6 @@ import sys -from typing import Optional, Tuple import torch @@ -52,7 +51,7 @@ class SubscriberBase: With this, the overall flow can be traced as a data flow graph (DAG). """ - def __init__(self, start_step: Optional[int], end_step: Optional[int]): + def __init__(self, start_step: int | None, end_step: int | None): """ Steps in [start_step, end_step) will run the subscriber's actions, and other steps will skip. If start_step is None, 0 is given; if end_step is None, sys.maxsize is given. @@ -66,7 +65,7 @@ def pre_forward_module_apply( module: torch.nn.Module, args: ORTModelInputOutputType, kwargs: ORTModelInputOutputType, - ) -> Tuple[ORTModelInputOutputType, ORTModelInputOutputType]: + ) -> tuple[ORTModelInputOutputType, ORTModelInputOutputType]: """This function is called inside the nn.Module's pre-forward hook. Args: @@ -91,7 +90,7 @@ def pre_forward_module_apply_impl( module: torch.nn.Module, args: ORTModelInputOutputType, kwargs: ORTModelInputOutputType, - ) -> Tuple[ORTModelInputOutputType, ORTModelInputOutputType]: + ) -> tuple[ORTModelInputOutputType, ORTModelInputOutputType]: return args, kwargs def pre_forward_tensor_apply( @@ -121,7 +120,7 @@ def post_forward_module_apply( module: torch.nn.Module, args: ORTModelInputOutputType, outputs: ORTModelInputOutputType, - ) -> Tuple[ORTModelInputOutputType, ORTModelInputOutputType]: + ) -> tuple[ORTModelInputOutputType, ORTModelInputOutputType]: """This function is called inside the nn.Module's post-forward hook. Args: @@ -146,7 +145,7 @@ def post_forward_module_apply_impl( module: torch.nn.Module, args: ORTModelInputOutputType, outputs: ORTModelInputOutputType, - ) -> Tuple[ORTModelInputOutputType, ORTModelInputOutputType]: + ) -> tuple[ORTModelInputOutputType, ORTModelInputOutputType]: return args, outputs def post_forward_tensor_apply( @@ -179,7 +178,7 @@ def post_forward_outmost_module_apply( module: torch.nn.Module, args: ORTModelInputOutputType, outputs: ORTModelInputOutputType, - ) -> Tuple[ORTModelInputOutputType, ORTModelInputOutputType]: + ) -> tuple[ORTModelInputOutputType, ORTModelInputOutputType]: """This function is called inside the outmost nn.Module's post-forward hook. Args: @@ -204,7 +203,7 @@ def post_forward_outmost_module_apply_impl( module: torch.nn.Module, args: ORTModelInputOutputType, outputs: ORTModelInputOutputType, - ) -> Tuple[ORTModelInputOutputType, ORTModelInputOutputType]: + ) -> tuple[ORTModelInputOutputType, ORTModelInputOutputType]: return args, outputs def _need_skip_step(self, current_step: int) -> bool: diff --git a/orttraining/orttraining/python/training/utils/hooks/_subscriber_manager.py b/orttraining/orttraining/python/training/utils/hooks/_subscriber_manager.py index c9c06dabab4de..c41f5078b20d7 100644 --- a/orttraining/orttraining/python/training/utils/hooks/_subscriber_manager.py +++ b/orttraining/orttraining/python/training/utils/hooks/_subscriber_manager.py @@ -6,7 +6,6 @@ import inspect from contextlib import contextmanager -from typing import List, Optional, Set, Tuple, Union import onnx import torch @@ -40,7 +39,7 @@ class _IncrementStep(torch.autograd.Function): """ @staticmethod - def forward(ctx, run_ctx: RuntimeStates, *input_tensor_list: Tuple[torch.Tensor, ...]) -> Tuple[torch.Tensor, ...]: + def forward(ctx, run_ctx: RuntimeStates, *input_tensor_list: tuple[torch.Tensor, ...]) -> tuple[torch.Tensor, ...]: """Make sure there is the same number of `tensor` inputs and outputs. This is enforced by ORT's PythonOp's schema check. """ @@ -57,15 +56,15 @@ def forward(ctx, run_ctx: RuntimeStates, *input_tensor_list: Tuple[torch.Tensor, return tuple(t.detach().requires_grad_(t.requires_grad) for t in input_tensor_list) @staticmethod - def backward(ctx, *grad_output: Tuple[Optional[torch.Tensor], ...]) -> Tuple[Optional[torch.Tensor], ...]: + def backward(ctx, *grad_output: tuple[torch.Tensor | None, ...]) -> tuple[torch.Tensor | None, ...]: return (None, *tuple(g for g in grad_output)) @staticmethod def infer_shape( node: onnx.NodeProto, - tensor_input_shapes: List[Optional[List[Union[int, str]]]], - tensor_input_dtypes: List[torch.onnx.TensorProtoDataType], - ) -> Tuple[List[Optional[List[Union[int, str]]]], List[torch.onnx.TensorProtoDataType]]: + tensor_input_shapes: list[list[int | str] | None], + tensor_input_dtypes: list[torch.onnx.TensorProtoDataType], + ) -> tuple[list[list[int | str] | None], list[torch.onnx.TensorProtoDataType]]: return tensor_input_shapes, tensor_input_dtypes @staticmethod @@ -104,11 +103,11 @@ class SubscriberManager: def __init__(self): self._run_ctx = RuntimeStates() - self._subscribers: Set[SubscriberBase] = set() + self._subscribers: set[SubscriberBase] = set() self._pre_forward_hooks = [] self._post_forward_hooks = [] - def subscribe(self, module: torch.nn.Module, subscribers: List[SubscriberBase]): + def subscribe(self, module: torch.nn.Module, subscribers: list[SubscriberBase]): """ The API is called externally to register hooks that are implicitly defined by subscribers. Each time all global states will be cleaned up once called. @@ -192,7 +191,7 @@ def _post_forward_outmost_module_hook(module, module_inputs, module_outputs): module.register_forward_hook(_post_forward_outmost_module_hook) def _initialize_one_time_global_states(self, module: torch.nn.Module): - def _reset_recursively(module: torch.nn.Module, depth: int, next_module_index: List[int]): + def _reset_recursively(module: torch.nn.Module, depth: int, next_module_index: list[int]): """ Called to register hooks for every `torch.nn.Module`. Due to `Module` can contain child `Module`s, this function is called recursively by passing in `next_module_index` - a list of int to maintain a @@ -219,7 +218,7 @@ def _reset_recursively(module: torch.nn.Module, depth: int, next_module_index: L next_module_index = [0] _reset_recursively(module, 1, next_module_index) - def _register_hooks_recursively(self, module: torch.nn.Module, depth: int, next_module_index: List[int]): + def _register_hooks_recursively(self, module: torch.nn.Module, depth: int, next_module_index: list[int]): """Register hooks for every `torch.nn.Module`. Due to `Module` can contain child `Module`s, this function is called recursively by passing in `next_module_index` - a list of int to maintain a global incremental unique module id. diff --git a/orttraining/orttraining/python/training/utils/hooks/_zero_offload_subscriber.py b/orttraining/orttraining/python/training/utils/hooks/_zero_offload_subscriber.py index d4b9768116e92..6b04d38f03fb7 100644 --- a/orttraining/orttraining/python/training/utils/hooks/_zero_offload_subscriber.py +++ b/orttraining/orttraining/python/training/utils/hooks/_zero_offload_subscriber.py @@ -7,9 +7,10 @@ import inspect import warnings from collections import OrderedDict +from collections.abc import Callable from datetime import timedelta from types import CodeType, FunctionType -from typing import Any, Callable, Dict, List, Optional, Tuple, Union +from typing import Any import onnx import torch @@ -80,7 +81,7 @@ def source_rank(self) -> int: def _source_rank(self) -> int: return 0 - def result(self) -> List[torch.Tensor]: + def result(self) -> list[torch.Tensor]: return [] def synchronize(self): @@ -177,7 +178,7 @@ def configure_ort_compatible_zero_stage3(debug=False, stats_output_dir=None, sta @nvtx_function_decorator -def _get_params_for_current_module(module: torch.nn.Module) -> List[torch.nn.parameter.Parameter]: +def _get_params_for_current_module(module: torch.nn.Module) -> list[torch.nn.parameter.Parameter]: """Retrieve the parameters for this module. Logic adapted from @@ -186,13 +187,13 @@ def _get_params_for_current_module(module: torch.nn.Module) -> List[torch.nn.par from deepspeed.runtime.zero.partitioned_param_coordinator import iter_params # Retrieve all parameters for this module. - partitioned_params = [param for param in iter_params(module)] + partitioned_params = list(iter_params(module)) return partitioned_params @nvtx_function_decorator -def _get_all_zero_stage3_params(module: torch.nn.Module) -> Dict[str, torch.nn.parameter.Parameter]: +def _get_all_zero_stage3_params(module: torch.nn.Module) -> dict[str, torch.nn.parameter.Parameter]: """Retrieve all the parameters that are offloaded.""" from deepspeed.runtime.zero.partition_parameters import ZeroParamStatus @@ -205,7 +206,7 @@ def _get_all_zero_stage3_params(module: torch.nn.Module) -> Dict[str, torch.nn.p # Used to cache the map avoid repeated loop up (X us) overhead during training. -_ModuleToParametersRefs: Dict[torch.nn.Module, List[torch.nn.parameter.Parameter]] = OrderedDict() +_ModuleToParametersRefs: dict[torch.nn.Module, list[torch.nn.parameter.Parameter]] = OrderedDict() class ORTZeROOffloadPreForwardFunction(torch.autograd.Function): @@ -295,7 +296,7 @@ def backward(ctx, *grads): # completing the full backward propagation, will not affect parameter updates. passed_in_param_grad = [ torch.zeros(shape, dtype=dtype, device=device) - for shape, dtype, device in zip(ctx.shapes, ctx.dtypes, ctx.devices) + for shape, dtype, device in zip(ctx.shapes, ctx.dtypes, ctx.devices, strict=False) ] zero_grads = updated_grads[:input_count] + tuple(passed_in_param_grad) @@ -306,9 +307,9 @@ def backward(ctx, *grads): @staticmethod def infer_shape( node: onnx.NodeProto, - tensor_input_shapes: List[Optional[List[Union[int, str]]]], - tensor_input_dtypes: List[torch.onnx.TensorProtoDataType], - ) -> Tuple[List[Optional[List[Union[int, str]]]], List[torch.onnx.TensorProtoDataType]]: + tensor_input_shapes: list[list[int | str] | None], + tensor_input_dtypes: list[torch.onnx.TensorProtoDataType], + ) -> tuple[list[list[int | str] | None], list[torch.onnx.TensorProtoDataType]]: input_pointer_scalars_attr_name = "input_pointer_scalars" found = [attr for attr in node.attribute if attr.name == input_pointer_scalars_attr_name] assert len(found) == 1 @@ -414,9 +415,9 @@ def backward(ctx, *grads): @staticmethod def infer_shape( node: onnx.NodeProto, - tensor_input_shapes: List[Optional[List[Union[int, str]]]], - tensor_input_dtypes: List[torch.onnx.TensorProtoDataType], - ) -> Tuple[List[Optional[List[Union[int, str]]]], List[torch.onnx.TensorProtoDataType]]: + tensor_input_shapes: list[list[int | str] | None], + tensor_input_dtypes: list[torch.onnx.TensorProtoDataType], + ) -> tuple[list[list[int | str] | None], list[torch.onnx.TensorProtoDataType]]: return tensor_input_shapes, tensor_input_dtypes @staticmethod @@ -480,7 +481,7 @@ def pre_forward_module_apply_impl( module: torch.nn.Module, args: ORTModelInputOutputType, kwargs: ORTModelInputOutputType, - ) -> Tuple[ORTModelInputOutputType, ORTModelInputOutputType]: + ) -> tuple[ORTModelInputOutputType, ORTModelInputOutputType]: """This function is a dispatcher to call DeepSpeed stage3 pre forward hooks in sequence. All hook functions can be retrieved from the function store, due to exporter only supports a list of tensors as @@ -556,7 +557,7 @@ def post_forward_module_apply_impl( module: torch.nn.Module, args: ORTModelInputOutputType, outputs: ORTModelInputOutputType, - ) -> Tuple[ORTModelInputOutputType, ORTModelInputOutputType]: + ) -> tuple[ORTModelInputOutputType, ORTModelInputOutputType]: """This function is a dispatcher to call DeepSpeed stage3 post forward hooks in sequence. All hook functions can be retrieved from function store, due to exporter only supports a list of tensors as @@ -615,7 +616,7 @@ def post_forward_outmost_module_apply_impl( module: torch.nn.Module, args: ORTModelInputOutputType, outputs: ORTModelInputOutputType, - ) -> Tuple[ORTModelInputOutputType, ORTModelInputOutputType]: + ) -> tuple[ORTModelInputOutputType, ORTModelInputOutputType]: outputs_tensors, outputs_schema = extract_data_and_schema(outputs) _end_of_forward_hook = self._functions.get("_end_of_forward_hook") @@ -636,7 +637,7 @@ def post_forward_outmost_module_apply_impl( return args, updated_outputs @nvtx_function_decorator - def _check_all_tensor(self, tensor_list: Tuple[torch.Tensor], module: torch.nn.Module, name: str): + def _check_all_tensor(self, tensor_list: tuple[torch.Tensor], module: torch.nn.Module, name: str): if not self._enable_debug_info: return diff --git a/orttraining/orttraining/python/training/utils/ptable.py b/orttraining/orttraining/python/training/utils/ptable.py index 5e06864800666..c3e022f252e13 100644 --- a/orttraining/orttraining/python/training/utils/ptable.py +++ b/orttraining/orttraining/python/training/utils/ptable.py @@ -3,14 +3,12 @@ # Licensed under the MIT License. # -------------------------------------------------------------------------- -from typing import List - class Row: """A row in a PTable""" - def __init__(self, columns: List[str]) -> None: - self._columns: List[str] = columns # List of strings + def __init__(self, columns: list[str]) -> None: + self._columns: list[str] = columns # List of strings self._annotation_table = None # Optional PTable used for displaying detailed information about the feature row. def append_annotation_table(self, ptable) -> None: @@ -21,11 +19,11 @@ class PTable: """A table that can be printed to the console.""" def __init__(self, sortable=False) -> None: - self._rows: List[Row] = [] + self._rows: list[Row] = [] self._column_count = None self._sortable = sortable # allow the rows to be sorted by the first column - def add_row(self, columns: List[str]) -> Row: + def add_row(self, columns: list[str]) -> Row: """Add a row to the table. The number of columns must match the number of columns in the table.""" if self._column_count is None: self._column_count = len(columns) diff --git a/orttraining/orttraining/python/training/utils/torch_io_helper.py b/orttraining/orttraining/python/training/utils/torch_io_helper.py index 4824ed7137021..f0cf09d91b81e 100644 --- a/orttraining/orttraining/python/training/utils/torch_io_helper.py +++ b/orttraining/orttraining/python/training/utils/torch_io_helper.py @@ -6,7 +6,7 @@ import copy import warnings from collections import OrderedDict, abc -from typing import List, Mapping, Optional, Sequence, Tuple, Union +from collections.abc import Mapping, Sequence import torch @@ -37,36 +37,36 @@ def get_primitive_dtype(value): # Data types supported as model inputs and outputs. -ORTModelInputOutputType = Union[ - None, - str, - int, - bool, - float, - torch.Tensor, - Sequence["ORTModelInputOutputType"], - Mapping[str, "ORTModelInputOutputType"], -] +ORTModelInputOutputType = ( + str + | int + | bool + | float + | torch.Tensor + | Sequence["ORTModelInputOutputType"] + | Mapping[str, "ORTModelInputOutputType"] + | None +) class _TensorStub: """Tensor stub class used to represent model's input or output""" - __slots__ = ["tensor_idx", "name", "dtype", "shape", "shape_dims"] + __slots__ = ["dtype", "name", "shape", "shape_dims", "tensor_idx"] def __init__( self, tensor_idx: int, - name: Optional[str] = None, - dtype: Optional[str] = None, + name: str | None = None, + dtype: str | None = None, shape=None, - shape_dims: Optional[int] = None, + shape_dims: int | None = None, ): self.tensor_idx = tensor_idx - self.name: Optional[str] = name - self.dtype: Optional[str] = dtype + self.name: str | None = name + self.dtype: str | None = dtype self.shape = shape - self.shape_dims: Optional[int] = shape_dims # r.g. rank. + self.shape_dims: int | None = shape_dims # r.g. rank. def __repr__(self) -> str: result = "_TensorStub(" @@ -108,13 +108,9 @@ def __eq__(self, other): # Data schema used to represent model's input or output. -ORTModelInputOutputSchemaType = Union[ - None, - str, - _TensorStub, - Sequence["ORTModelInputOutputSchemaType"], - Mapping[str, "ORTModelInputOutputSchemaType"], -] +ORTModelInputOutputSchemaType = ( + str | _TensorStub | Sequence["ORTModelInputOutputSchemaType"] | Mapping[str, "ORTModelInputOutputSchemaType"] | None +) def _warn_of_constant_inputs(data): @@ -126,8 +122,8 @@ def _warn_of_constant_inputs(data): @nvtx_function_decorator def extract_data_and_schema( - data: ORTModelInputOutputType, constant_as_tensor=False, device: Optional[torch.device] = None -) -> Tuple[List[torch.Tensor], ORTModelInputOutputSchemaType]: + data: ORTModelInputOutputType, constant_as_tensor=False, device: torch.device | None = None +) -> tuple[list[torch.Tensor], ORTModelInputOutputSchemaType]: """Extract the data schema by replacing every torch.Tensor value with _TensorStub, and return all tensors in a list. @@ -235,7 +231,7 @@ def _flatten_from_data(data: ORTModelInputOutputType, prefix_name: str = ""): @nvtx_function_decorator def unflatten_data_using_schema( - data: List[torch.Tensor], schema: ORTModelInputOutputSchemaType + data: list[torch.Tensor], schema: ORTModelInputOutputSchemaType ) -> ORTModelInputOutputType: """Follows the schema to generate an output that is expected by the user. @@ -280,7 +276,7 @@ def unflatten_data_using_schema( """ - def _replace_stub_with_tensor_value(data_schema: ORTModelInputOutputSchemaType, data: List[torch.Tensor]): + def _replace_stub_with_tensor_value(data_schema: ORTModelInputOutputSchemaType, data: list[torch.Tensor]): # Recursively traverse across user_output and replace all _TensorStub # with torch.Tensor values from outputs following output_idx @@ -291,9 +287,9 @@ def _replace_stub_with_tensor_value(data_schema: ORTModelInputOutputSchemaType, elif PrimitiveType.is_primitive_type(data_schema): return data_schema elif isinstance(data_schema, _TensorStub): - assert isinstance( - data[data_schema.tensor_idx], torch.Tensor - ), f"Expecting torch.Tensor, got {type(data[data_schema.tensor_idx])}" + assert isinstance(data[data_schema.tensor_idx], torch.Tensor), ( + f"Expecting torch.Tensor, got {type(data[data_schema.tensor_idx])}" + ) return data[data_schema.tensor_idx] elif isinstance(data_schema, abc.Sequence): sequence_type = type(data_schema) diff --git a/orttraining/orttraining/python/training/utils/torch_type_map.py b/orttraining/orttraining/python/training/utils/torch_type_map.py index 2b429f3fd4f3a..49c3b32fc5037 100644 --- a/orttraining/orttraining/python/training/utils/torch_type_map.py +++ b/orttraining/orttraining/python/training/utils/torch_type_map.py @@ -4,8 +4,6 @@ # -------------------------------------------------------------------------- -from typing import Union - import torch # Mapping from pytorch scalar type to onnx scalar type. @@ -36,7 +34,7 @@ _ONNX_TO_DTYPE = {onnx_dtype: torch_dtype for torch_dtype, onnx_dtype in _DTYPE_TO_ONNX.items()} -def pytorch_type_to_onnx_dtype(dtype_or_scalar_type: Union[torch.dtype, str]) -> torch.onnx.TensorProtoDataType: +def pytorch_type_to_onnx_dtype(dtype_or_scalar_type: torch.dtype | str) -> torch.onnx.TensorProtoDataType: """Converts a pytorch dtype or scalar type string to an onnx dtype. PyTorch type can be either a dtype or a scalar type string. """ diff --git a/orttraining/orttraining/test/external_custom_ops/setup.py b/orttraining/orttraining/test/external_custom_ops/setup.py index 29383e3618346..33e4b8409752b 100644 --- a/orttraining/orttraining/test/external_custom_ops/setup.py +++ b/orttraining/orttraining/test/external_custom_ops/setup.py @@ -50,6 +50,6 @@ def build_extension(self, ext): description="External custom ops example", long_description="", ext_modules=[CMakeExtension("orttrainng_external_custom_ops")], - cmdclass=dict(build_ext=CMakeBuild), + cmdclass={"build_ext": CMakeBuild}, zip_safe=False, ) diff --git a/orttraining/orttraining/test/gradient/gradient_ops_test.cc b/orttraining/orttraining/test/gradient/gradient_ops_test.cc index b81a08e23e3cf..f4083d5b8f933 100644 --- a/orttraining/orttraining/test/gradient/gradient_ops_test.cc +++ b/orttraining/orttraining/test/gradient/gradient_ops_test.cc @@ -1571,7 +1571,9 @@ TEST(GradientCheckerTest, SigmoidGrad) { UnaryOpGradientTest("Sigmoid"); } TEST(GradientCheckerTest, QuickGeluGrad) { // Default alpha = 1.702, relax the tolerance due failure on Win for some seed. - { UnaryOpGradientTest("QuickGelu", kMSDomain, 1, nullptr, nullptr, {}, 5e-2f); } + { + UnaryOpGradientTest("QuickGelu", kMSDomain, 1, nullptr, nullptr, {}, 5e-2f); + } // Silu, alpha = 1.0. { @@ -3354,6 +3356,29 @@ TEST(GradientCheckerTest, ResizeGrad) { TEST(GradientCheckerTest, AtanGrad) { UnaryOpGradientTest("Atan"); } +TEST(GradientCheckerTest, GlobalMaxPoolGrad) { + float max_error; + GradientChecker gradient_checker; + OpDef op_def{"GlobalMaxPool", kOnnxDomain, 11}; + constexpr float error_tolerance = 1e-3f; + + // globalmaxpool + { + ASSERT_STATUS_OK(gradient_checker.ComputeGradientError(op_def, {{2, 3, 5, 5}}, {{2, 3, 1, 1}}, &max_error, {}, + /*check_not_have_gradient*/ true, + /*check_not_have_shape_inferencing*/ true)); + EXPECT_IS_TINIER_THAN(max_error, error_tolerance); + } + + // globalmaxpool_precomputed + { + ASSERT_STATUS_OK(gradient_checker.ComputeGradientError(op_def, {{2, 1, 3, 3}}, {{2, 1, 1, 1}}, &max_error, {}, + /*check_not_have_gradient*/ true, + /*check_not_have_shape_inferencing*/ true)); + EXPECT_IS_TINIER_THAN(max_error, error_tolerance); + } +} + } // namespace test } // namespace onnxruntime diff --git a/orttraining/orttraining/test/python/_test_helpers.py b/orttraining/orttraining/test/python/_test_helpers.py index 65043c10d8a01..1dd304549869d 100644 --- a/orttraining/orttraining/test/python/_test_helpers.py +++ b/orttraining/orttraining/test/python/_test_helpers.py @@ -84,13 +84,18 @@ def _get_name(name): # Depending on calling backward() from which outputs, it's possible that grad of some weights are not calculated. # none_pt_params is to tell what these weights are, so we will not compare the tensors. def assert_gradients_match_and_reset_gradient( - ort_model, pt_model, none_pt_params=[], reset_gradient=True, rtol=1e-04, atol=1e-05 # noqa: B006 + ort_model, + pt_model, + none_pt_params=(), + reset_gradient=True, + rtol=1e-04, + atol=1e-05, ): ort_named_params = list(ort_model.named_parameters()) pt_named_params = list(pt_model.named_parameters()) assert len(ort_named_params) == len(pt_named_params) - for ort_named_param, pt_named_param in zip(ort_named_params, pt_named_params): + for ort_named_param, pt_named_param in zip(ort_named_params, pt_named_params, strict=False): ort_name, ort_param = ort_named_param pt_name, pt_param = pt_named_param @@ -175,7 +180,7 @@ def run_with_ort_on_device(device, model, input_list, label_input, is_eval_mode= def compare_tensor_list(val_list_a, val_list_b): - for val_a, val_b in zip(val_list_a, val_list_b): + for val_a, val_b in zip(val_list_a, val_list_b, strict=False): assert_values_are_close(val_a, val_b, atol=1e-7, rtol=1e-6) diff --git a/orttraining/orttraining/test/python/orttraining_test_dort.py b/orttraining/orttraining/test/python/orttraining_test_dort.py index e57b615de07bb..759af0854145f 100644 --- a/orttraining/orttraining/test/python/orttraining_test_dort.py +++ b/orttraining/orttraining/test/python/orttraining_test_dort.py @@ -162,12 +162,12 @@ def run(fun, seed: torch.Tensor): # ORT result. tensors = run(optimized_elementwise_model, seed) - for tensor, baseline_tensor in zip(tensors, baseline_tensors): + for tensor, baseline_tensor in zip(tensors, baseline_tensors, strict=False): torch.testing.assert_close(tensor, baseline_tensor) - assert ( - len(cached.keys()) == 2 - ), "Should only see two GraphModules so far. One for forward and the other one for backward." + assert len(cached.keys()) == 2, ( + "Should only see two GraphModules so far. One for forward and the other one for backward." + ) for value in cached.values(): assert len(value) == 1, ( "One GraphModule should only be mapped to one ONNX model since " @@ -182,7 +182,7 @@ def run(fun, seed: torch.Tensor): # ORT result. tensors = run(optimized_elementwise_model, seed) - for tensor, baseline_tensor in zip(tensors, baseline_tensors): + for tensor, baseline_tensor in zip(tensors, baseline_tensors, strict=False): torch.testing.assert_close(tensor, baseline_tensor) # 4 GraphModule's respectively for @@ -369,7 +369,7 @@ def run(model, tensor_x, tensor_y): print(f"MNIST loss: {loss} (pytorch), {loss_new} (ort).") torch.testing.assert_close(loss, loss_new, rtol=1e-2, atol=1e-5) - for grad, grad_new in zip(grads, grads_new): + for grad, grad_new in zip(grads, grads_new, strict=False): torch.testing.assert_close(grad, grad_new) # Run 5 times because ORT runs have side effects and we want to make sure diff --git a/orttraining/orttraining/test/python/orttraining_test_experimental_gradient_graph.py b/orttraining/orttraining/test/python/orttraining_test_experimental_gradient_graph.py index dd26448f0c596..07a9ab3a1d1cf 100644 --- a/orttraining/orttraining/test/python/orttraining_test_experimental_gradient_graph.py +++ b/orttraining/orttraining/test/python/orttraining_test_experimental_gradient_graph.py @@ -92,7 +92,7 @@ def test_save(self): ort_outs = ort_session.run(None, ort_inputs) onnx_output_names = [node.name for node in onnx_model.graph.output] - onnx_name_to_output = dict(zip(onnx_output_names, ort_outs)) + onnx_name_to_output = dict(zip(onnx_output_names, ort_outs, strict=False)) ort_output = onnx_name_to_output["output"] np.testing.assert_allclose(to_numpy(torch_out), ort_output, rtol=1e-03, atol=1e-05) diff --git a/orttraining/orttraining/test/python/orttraining_test_gru.py b/orttraining/orttraining/test/python/orttraining_test_gru.py index c9e22bf7384af..0693b2ada447b 100644 --- a/orttraining/orttraining/test/python/orttraining_test_gru.py +++ b/orttraining/orttraining/test/python/orttraining_test_gru.py @@ -355,7 +355,9 @@ def backward_np( prev_h = ( all_hidden_states[t - 1, 0, idx, :] if t > 0 - else initial_hidden_state[0, idx, :] if initial_hidden_state is not None else 0 + else initial_hidden_state[0, idx, :] + if initial_hidden_state is not None + else 0 ) grad_update_gate = (prev_h - hidden_gate) * grad_h @@ -664,7 +666,7 @@ def test_gru_forward(sequence_length, batch_size, input_size, hidden_size, linea outs_ort = gru.forward_ort(inputs, weights, recurrence_weights, bias, initial_hidden_state) outs_np = gru.forward_np(inputs, weights, recurrence_weights, bias, initial_hidden_state) - for ort_out, np_out in zip(outs_ort, outs_np): + for ort_out, np_out in zip(outs_ort, outs_np, strict=False): assert np.allclose(ort_out, np_out, rtol=1e-03, atol=1e-05) @@ -714,5 +716,5 @@ def test_gru_backward(sequence_length, batch_size, input_size, hidden_size, line grad_final_hidden_state, ) - for ort_out, np_out in zip(outs_ort, outs_np): + for ort_out, np_out in zip(outs_ort, outs_np, strict=False): assert np.allclose(ort_out, np_out, rtol=1e-01, atol=1e-03) diff --git a/orttraining/orttraining/test/python/orttraining_test_hierarchical_ortmodule.py b/orttraining/orttraining/test/python/orttraining_test_hierarchical_ortmodule.py index 655c9def2c66c..ff1c4dc8aad13 100644 --- a/orttraining/orttraining/test/python/orttraining_test_hierarchical_ortmodule.py +++ b/orttraining/orttraining/test/python/orttraining_test_hierarchical_ortmodule.py @@ -200,7 +200,7 @@ def call_backward(y): def call_allclose(y, y_ref): assert type(y) is type(y_ref) if isinstance(y, Iterable): - for ele, ele_ref in zip(y, y_ref): + for ele, ele_ref in zip(y, y_ref, strict=False): torch.allclose(ele, ele_ref) else: torch.allclose(y, y_ref) diff --git a/orttraining/orttraining/test/python/orttraining_test_lort.py b/orttraining/orttraining/test/python/orttraining_test_lort.py index ccd06e1a3ab62..3aca181edcfc2 100644 --- a/orttraining/orttraining/test/python/orttraining_test_lort.py +++ b/orttraining/orttraining/test/python/orttraining_test_lort.py @@ -101,7 +101,7 @@ def run(model, device, x, y): print(f"MNIST loss: {loss} (pytorch), {loss_new} (ort).") torch.testing.assert_close(loss.to("lazy"), loss_new, rtol=1e-2, atol=1e-5) - for g, g_new in zip(grads, grads_new): + for g, g_new in zip(grads, grads_new, strict=False): torch.testing.assert_close(g.to("lazy"), g_new) for _ in range(5): diff --git a/orttraining/orttraining/test/python/orttraining_test_lstm.py b/orttraining/orttraining/test/python/orttraining_test_lstm.py index 4debe73951b2f..57fb6c4d1985b 100644 --- a/orttraining/orttraining/test/python/orttraining_test_lstm.py +++ b/orttraining/orttraining/test/python/orttraining_test_lstm.py @@ -480,7 +480,9 @@ def backward_np( grad_forget_gate = grad_c * ( all_cell_states[t - 1, 0, idx, :] if t > 0 - else initial_cell_state[0, idx, :] if initial_cell_state is not None else 0 + else initial_cell_state[0, idx, :] + if initial_cell_state is not None + else 0 ) grad_control_gate = grad_c * input_gate @@ -520,7 +522,9 @@ def backward_np( prev_h = ( all_hidden_states[t - 1, 0, idx, :] if t > 0 - else initial_hidden_state[0, idx, :] if initial_hidden_state is not None else 0 + else initial_hidden_state[0, idx, :] + if initial_hidden_state is not None + else 0 ) grad_recurrence_weights[0, : self._hidden_size, :] += np.dot( np.expand_dims(grad_input_activation, axis=0).T, np.expand_dims(prev_h, axis=0) @@ -549,17 +553,22 @@ def backward_np( grad_peephole_weights[0, : self._hidden_size] += grad_input_activation * ( all_cell_states[t - 1, 0, idx, :] if t > 0 - else initial_cell_state[0, idx, :] if initial_cell_state is not None else 0 + else initial_cell_state[0, idx, :] + if initial_cell_state is not None + else 0 ) grad_peephole_weights[0, self._hidden_size : 2 * self._hidden_size] += ( grad_output_activation * all_cell_states[t, 0, idx, :] ) - grad_peephole_weights[ - 0, 2 * self._hidden_size : 3 * self._hidden_size - ] += grad_forget_activation * ( - all_cell_states[t - 1, 0, idx, :] - if t > 0 - else initial_cell_state[0, idx, :] if initial_cell_state is not None else 0 + grad_peephole_weights[0, 2 * self._hidden_size : 3 * self._hidden_size] += ( + grad_forget_activation + * ( + all_cell_states[t - 1, 0, idx, :] + if t > 0 + else initial_cell_state[0, idx, :] + if initial_cell_state is not None + else 0 + ) ) grad_c = grad_prev_c @@ -858,7 +867,7 @@ def test_lstm_forward(sequence_length, batch_size, input_size, hidden_size): inputs, weights, recurrence_weights, bias, initial_hidden_state, initial_cell_state, peephole_weights ) - for ort_out, np_out in zip(outs_ort, outs_np): + for ort_out, np_out in zip(outs_ort, outs_np, strict=False): assert np.allclose(ort_out, np_out, rtol=1e-03, atol=1e-05) @@ -924,5 +933,5 @@ def test_lstm_backward(sequence_length, batch_size, input_size, hidden_size): grad_final_cell_state, ) - for ort_out, np_out in zip(outs_ort, outs_np): + for ort_out, np_out in zip(outs_ort, outs_np, strict=False): assert np.allclose(ort_out, np_out, rtol=1e-03, atol=1e-05) diff --git a/orttraining/orttraining/test/python/orttraining_test_model_transform.py b/orttraining/orttraining/test/python/orttraining_test_model_transform.py index 6ea81fc6aa089..a320e0b202a8b 100644 --- a/orttraining/orttraining/test/python/orttraining_test_model_transform.py +++ b/orttraining/orttraining/test/python/orttraining_test_model_transform.py @@ -3,7 +3,7 @@ def add_name(model): for i, node in enumerate(model.graph.node): - node.name = "%s_%d" % (node.op_type, i) + node.name = f"{node.op_type}_{i}" def find_single_output_node(model, arg): diff --git a/orttraining/orttraining/test/python/orttraining_test_ort_apis_onnxblock.py b/orttraining/orttraining/test/python/orttraining_test_ort_apis_onnxblock.py index 0866d4a411e29..dfc83de198f21 100644 --- a/orttraining/orttraining/test/python/orttraining_test_ort_apis_onnxblock.py +++ b/orttraining/orttraining/test/python/orttraining_test_ort_apis_onnxblock.py @@ -605,7 +605,7 @@ def test_retrieve_parameters(): # Then assert not non_trainable_params - for ort_param, (pt_param_name, pt_param) in zip(trainable_params, pt_model.named_parameters()): + for ort_param, (pt_param_name, pt_param) in zip(trainable_params, pt_model.named_parameters(), strict=False): assert ort_param.name == pt_param_name assert np.allclose( np.frombuffer(ort_param.raw_data, dtype=np.float32).reshape(pt_param.shape), @@ -853,7 +853,7 @@ def mse_loss(prediction, target): ort_outs = ort_session.run([ort_output_names], ort_inputs) # assert all the gradients are close - for ort_grad, pt_param in zip(ort_outs[0], pt_model.parameters()): + for ort_grad, pt_param in zip(ort_outs[0], pt_model.parameters(), strict=False): assert np.allclose(ort_grad, _to_numpy(pt_param.grad)) @@ -1102,7 +1102,6 @@ def test_custom_optimizer_block(): def test_generate_artifacts_path(): - with tempfile.TemporaryDirectory() as temp_dir: _, simple_net = _get_models("cpu", 32, 28, 10, 10) @@ -1159,7 +1158,7 @@ def test_generate_artifacts_external_data_one_file(): assert os.path.exists(os.path.join(temp_dir, "checkpoint")) -@pytest.mark.parametrize("loss", [loss_t for loss_t in artifacts.LossType]) +@pytest.mark.parametrize("loss", list(artifacts.LossType)) def test_generate_artifacts_external_data_separate_files(loss): with tempfile.TemporaryDirectory() as temp_dir: _, simple_net = _get_models("cpu", 32, 28, 10, 10) diff --git a/orttraining/orttraining/test/python/orttraining_test_ort_pipeline_module.py b/orttraining/orttraining/test/python/orttraining_test_ort_pipeline_module.py index d59e32cde33dd..8047e4217c6f9 100644 --- a/orttraining/orttraining/test/python/orttraining_test_ort_pipeline_module.py +++ b/orttraining/orttraining/test/python/orttraining_test_ort_pipeline_module.py @@ -1,5 +1,4 @@ import argparse -from typing import Dict, Tuple import deepspeed import torch @@ -39,14 +38,14 @@ def __init__(self, x: torch.Tensor, y: torch.Tensor): def __len__(self) -> int: return self.x.size(0) - def __getitem__(self, idx: int) -> Tuple[torch.Tensor, torch.Tensor]: + def __getitem__(self, idx: int) -> tuple[torch.Tensor, torch.Tensor]: return self.x[idx], self.y[idx] class SimpleNetPipeInput(nn.Module): """First stage of the pipeline, responsible for initial processing.""" - def __init__(self, config: Dict[str, int]): + def __init__(self, config: dict[str, int]): super().__init__() self.linear = nn.Linear(config["input_size"], config["hidden_size"]) self.activation = nn.ReLU() @@ -60,7 +59,7 @@ def forward(self, x: torch.Tensor) -> torch.Tensor: class SimpleNetPipeBlock(nn.Module): """Intermediate stage of the pipeline, can be duplicated to deepen the network.""" - def __init__(self, config: Dict[str, int]): + def __init__(self, config: dict[str, int]): super().__init__() self.linear = nn.Linear(config["hidden_size"], config["hidden_size"]) self.activation = nn.ReLU() @@ -74,7 +73,7 @@ def forward(self, x: torch.Tensor) -> torch.Tensor: class SimpleNetPipeOutput(nn.Module): """Final stage of the pipeline, producing the output.""" - def __init__(self, config: Dict[str, int]): + def __init__(self, config: dict[str, int]): super().__init__() self.linear = nn.Linear(config["hidden_size"], config["output_size"]) @@ -83,7 +82,7 @@ def forward(self, x: torch.Tensor) -> torch.Tensor: return x -def build_model(config: Dict[str, int], n: int, layer_spec: bool) -> nn.Module: +def build_model(config: dict[str, int], n: int, layer_spec: bool) -> nn.Module: """Constructs and returns the model either using LayerSpec or nn.Sequential.""" if layer_spec: print("Wrapping layers with LayerSpec") diff --git a/orttraining/orttraining/test/python/orttraining_test_ortmodule_api.py b/orttraining/orttraining/test/python/orttraining_test_ortmodule_api.py index 0ab441ac936fe..661d4af043c40 100644 --- a/orttraining/orttraining/test/python/orttraining_test_ortmodule_api.py +++ b/orttraining/orttraining/test/python/orttraining_test_ortmodule_api.py @@ -3320,8 +3320,8 @@ def test_parameters(): N, D_in, H, D_out = 64, 784, 500, 10 # noqa: F841, N806 pt_model = NeuralNetSinglePositionalArgument(D_in, H, D_out).to(device) ort_model = ORTModule(copy.deepcopy(pt_model)) - parameters_pt = [param for param in pt_model.parameters()] - parameters_ort = [param for param in ort_model.parameters()] + parameters_pt = list(pt_model.parameters()) + parameters_ort = list(ort_model.parameters()) assert len(parameters_pt) > 0 assert len(parameters_pt) == len(parameters_ort) @@ -3351,8 +3351,8 @@ def test_buffers(): pt_model = NeuralNetSinglePositionalArgument(D_in, H, D_out).to(device) pt_model.register_buffer("sample_buffer_pt", torch.tensor(torch.randn(N, D_in, device=device))) ort_model = ORTModule(copy.deepcopy(pt_model)) - buffers_pt = [buffer for buffer in pt_model.buffers()] - buffers_ort = [buffer for buffer in ort_model.buffers()] + buffers_pt = list(pt_model.buffers()) + buffers_ort = list(ort_model.buffers()) assert len(buffers_pt) > 0 assert len(buffers_pt) == len(buffers_ort) @@ -3360,7 +3360,7 @@ def test_buffers(): x = torch.tensor(torch.randn(N, D_in, device=device)) ort_model.register_buffer("sample_buffer_ort", x) - buffers_ort = [buffer for buffer in ort_model.buffers()] + buffers_ort = list(ort_model.buffers()) assert len(buffers_ort) == 2 assert torch.equal(buffers_ort[1], x) @@ -4166,7 +4166,7 @@ def forward( out_ort = ort_model(*y) assert len(out_pt) == len(out_ort) - for x, y in zip(out_pt, out_ort): + for x, y in zip(out_pt, out_ort, strict=False): _test_helpers.assert_values_are_close(x, y) @@ -4257,7 +4257,7 @@ def test_hf_save_pretrained(): ).to(device) model2 = ORTModule(model2) - for p1, p2 in zip(model1.parameters(), model2.parameters()): + for p1, p2 in zip(model1.parameters(), model2.parameters(), strict=False): assert p1.data.ne(p2.data).sum() == 0 @@ -5123,7 +5123,7 @@ def run_optim_step(optimizer): pt_loss = run_step(pt_model, x1) ort_loss = run_step(ort_model, x2) - for pt_param, ort_param in zip(pt_model.parameters(), ort_model.parameters()): + for pt_param, ort_param in zip(pt_model.parameters(), ort_model.parameters(), strict=False): ort_param.grad = copy.deepcopy(pt_param.grad) _test_helpers.assert_values_are_close(pt_loss, ort_loss) @@ -5133,7 +5133,7 @@ def run_optim_step(optimizer): run_optim_step(transformers_adamw_optimizer) run_optim_step(ort_fused_adam_optimizer) - for pt_param, ort_param in zip(pt_model.parameters(), ort_model.parameters()): + for pt_param, ort_param in zip(pt_model.parameters(), ort_model.parameters(), strict=False): _test_helpers.assert_values_are_close(pt_param, ort_param, atol=1e-4, rtol=1e-5) @@ -5173,7 +5173,7 @@ def run_optim_step(optimizer): pt_loss = run_step(pt_model, x1) ort_loss = run_step(ort_model, x2) - for pt_param, ort_param in zip(pt_model.parameters(), ort_model.parameters()): + for pt_param, ort_param in zip(pt_model.parameters(), ort_model.parameters(), strict=False): ort_param.grad = copy.deepcopy(pt_param.grad) _test_helpers.assert_values_are_close(pt_loss, ort_loss, atol=1e-4, rtol=1e-5) @@ -5185,7 +5185,7 @@ def run_optim_step(optimizer): run_optim_step(adamw_optimizer) run_optim_step(ort_fused_adam_optimizer) - for pt_param, ort_param in zip(pt_model.parameters(), ort_model.parameters()): + for pt_param, ort_param in zip(pt_model.parameters(), ort_model.parameters(), strict=False): _test_helpers.assert_values_are_close(pt_param, ort_param, atol=1e-4, rtol=1e-5) @@ -5506,7 +5506,7 @@ def random_state_equal(a, b): assert type(a) is type(b) if isinstance(a, tuple): assert len(a) == len(b) - return all([random_state_equal(a_i, b_i) for a_i, b_i in zip(a, b)]) + return all(random_state_equal(a_i, b_i) for a_i, b_i in zip(a, b, strict=False)) if isinstance(a, np.ndarray): return np.array_equal(a, b) if isinstance(a, torch.Tensor): @@ -6170,7 +6170,7 @@ def generate_inputs(batch_size, max_seq_length, vocab_size): run_optim_step(pt_optimizer) run_optim_step(ort_optimizer) - for pt_param, ort_param in zip(pt_model.parameters(), ort_model.parameters()): + for pt_param, ort_param in zip(pt_model.parameters(), ort_model.parameters(), strict=False): _test_helpers.assert_values_are_close(pt_param.grad, ort_param.grad, atol=1e-4, rtol=1e-5) if os.getenv("ORTMODULE_ROCM_TEST", "0") == "1": @@ -6394,7 +6394,7 @@ def run_step(model, x): pt_grads = run_step(pt_model, pt_x) ort_grads = run_step(ort_model, ort_x) - for pt_grad, ort_grad in zip(pt_grads, ort_grads): + for pt_grad, ort_grad in zip(pt_grads, ort_grads, strict=False): if use_fp16: assert torch.allclose(pt_grad, ort_grad, atol=1e-3, rtol=1e-3) else: @@ -6443,7 +6443,7 @@ def run_step(model, x): pt_grads = run_step(pt_model, pt_x) ort_grads = run_step(ort_model, ort_x) - for pt_grad, ort_grad in zip(pt_grads, ort_grads): + for pt_grad, ort_grad in zip(pt_grads, ort_grads, strict=False): assert torch.allclose(pt_grad, ort_grad) if conv_algo_search is not None: @@ -6489,7 +6489,7 @@ def run_step(model, x): pt_grads = run_step(pt_model, pt_x) ort_grads = run_step(ort_model, ort_x) - for pt_grad, ort_grad in zip(pt_grads, ort_grads): + for pt_grad, ort_grad in zip(pt_grads, ort_grads, strict=False): assert torch.allclose(pt_grad, ort_grad, atol=1e-2, rtol=1e-2) if conv_algo_search is not None: @@ -6562,7 +6562,8 @@ def test_bert_memory_inspection(caplog): os.environ["ORTMODULE_PRINT_MEMORY_STATS"] = "1" pt_model.eval() # Put it in evaluate mode by intention, in case some initialization in ORTModule use the module.is_training for its checks by mistake. ort_model = ORTModule( - copy.deepcopy(pt_model), DebugOptions(log_level=LogLevel.INFO) # The logged memory info is in INFO level. + copy.deepcopy(pt_model), + DebugOptions(log_level=LogLevel.INFO), # The logged memory info is in INFO level. ) def run_step(model, x, y, z): @@ -6634,7 +6635,7 @@ def run_step(model, attn_weight): ) onnx_model = onnx.load(path) - onnx_nodes = [n for n in onnx_model.graph.node] + onnx_nodes = list(onnx_model.graph.node) assert onnx_nodes[0].op_type == "Cast" to_attr = onnx_nodes[0].attribute[0] @@ -6776,11 +6777,9 @@ def forward(self, x): def test_layerwise_recompute_pythonop_deterministic(): - original_val = os.environ.get("ORTMODULE_MEMORY_OPT_LEVEL", None) class DropoutFunction(torch.autograd.Function): - @staticmethod def forward(ctx, x): return torch.nn.functional.dropout(x, p=0.5, training=True) @@ -6918,7 +6917,7 @@ def generate_inputs(batch_size, max_seq_length, vocab_size): ort_model2 = ORTModule(copy.deepcopy(pt_model), DebugOptions(save_onnx=True, onnx_prefix="recompute")) ort_prediction2 = run_step(ort_model2, ort_input, ort_mask, ort_target) - for ort_param1, ort_param2 in zip(ort_model1.parameters(), ort_model2.parameters()): + for ort_param1, ort_param2 in zip(ort_model1.parameters(), ort_model2.parameters(), strict=False): _test_helpers.assert_values_are_close(ort_param1.grad, ort_param2.grad, atol=1e-4, rtol=1e-5) if os.getenv("ORTMODULE_ROCM_TEST", "0") == "1": diff --git a/orttraining/orttraining/test/python/orttraining_test_ortmodule_autograd.py b/orttraining/orttraining/test/python/orttraining_test_ortmodule_autograd.py index 95012aa0507a5..d8707a25c4129 100644 --- a/orttraining/orttraining/test/python/orttraining_test_ortmodule_autograd.py +++ b/orttraining/orttraining/test/python/orttraining_test_ortmodule_autograd.py @@ -7,7 +7,6 @@ import copy import os -from typing import Tuple import onnx import pytest @@ -264,13 +263,13 @@ def forward( ctx, input, alpha: float, - beta: Tuple[float, float], + beta: tuple[float, float], gamma: float, delta: bool, - epsilon: Tuple[bool, bool], + epsilon: tuple[bool, bool], zeta: int, - eta: Tuple[int, int], - theta: Tuple[float, float], + eta: tuple[int, int], + theta: tuple[float, float], ): ctx.save_for_backward(input) ctx.alpha = alpha @@ -296,7 +295,7 @@ def backward(ctx, grad_output): assert alpha == alpha_value assert isinstance(alpha, float) - assert all(a == b for a, b in zip(beta, beta_value)) + assert all(a == b for a, b in zip(beta, beta_value, strict=False)) assert all(isinstance(x, float) for x in beta) assert gamma == gamma_value @@ -305,16 +304,16 @@ def backward(ctx, grad_output): assert ctx.delta == delta_value assert isinstance(ctx.delta, bool) - assert all(a == b for a, b in zip(ctx.epsilon, epsilon_value)) + assert all(a == b for a, b in zip(ctx.epsilon, epsilon_value, strict=False)) assert all(isinstance(x, bool) for x in ctx.epsilon) assert ctx.zeta == zeta_value assert isinstance(ctx.zeta, int) - assert all(a == b for a, b in zip(ctx.eta, eta_value)) + assert all(a == b for a, b in zip(ctx.eta, eta_value, strict=False)) assert all(isinstance(x, int) for x in ctx.eta) - assert all(a == b for a, b in zip(ctx.theta, theta_value)) + assert all(a == b for a, b in zip(ctx.theta, theta_value, strict=False)) assert all(isinstance(x, float) for x in ctx.theta) return alpha * beta[0] * beta[1] * gamma * grad_input, None, None, None, None, None, None, None, None @@ -1414,13 +1413,9 @@ def test_pythonop_training_mode(): def check_pythonop_training_mode(model, is_eval_mode): ## make sure the ort's PythonOp's training_mode is correct if is_eval_mode: - onnx_nodes = ( - model._torch_module._execution_manager._inference_manager._graph_transition_manager._exported_model_info.exported_model.graph.node - ) + onnx_nodes = model._torch_module._execution_manager._inference_manager._graph_transition_manager._exported_model_info.exported_model.graph.node else: - onnx_nodes = ( - model._torch_module._execution_manager._training_manager._graph_transition_manager._exported_model_info.exported_model.graph.node - ) + onnx_nodes = model._torch_module._execution_manager._training_manager._graph_transition_manager._exported_model_info.exported_model.graph.node found_pythonop = False for node in onnx_nodes: @@ -1642,20 +1637,20 @@ def _find_shape_and_dtype(value_infos): _find_shape_and_dtype(graph.value_info) assert all(s is not None for s in input_shapes), "PythonOp input shape should be found in the optimized_model" - assert ( - all(d is not None for d in input_dtypes) is not None - ), "PythonOp input dtype should be found in the optimized_model" + assert all(d is not None for d in input_dtypes) is not None, ( + "PythonOp input dtype should be found in the optimized_model" + ) assert all(s is not None for s in output_shapes), "PythonOp output shape should be found in the optimized_model" - assert ( - all(d is not None for d in output_dtypes) is not None - ), "PythonOp output dtype should be found in the optimized_model" + assert all(d is not None for d in output_dtypes) is not None, ( + "PythonOp output dtype should be found in the optimized_model" + ) def _compare_shape(shape1, shape2): if len(shape1.dim) != len(shape2.dim): return False - for dim1, dim2 in zip(shape1.dim, shape2.dim): + for dim1, dim2 in zip(shape1.dim, shape2.dim, strict=False): if dim1.HasField("dim_value") and dim1.HasField("dim_value") and dim1.dim_value == dim2.dim_value: continue @@ -1794,7 +1789,7 @@ def _run_step(model, input): _run_step(pt_model, input) _run_step(ort_model, input) - pt_params = {n: p for n, p in pt_model.named_parameters()} + pt_params = dict(pt_model.named_parameters()) for name, param in ort_model.named_parameters(): assert_values_are_close(param, pt_params[name], rtol=1e-04, atol=1e-3) if param.grad is not None: @@ -1805,7 +1800,6 @@ def _run_step(model, input): def test_determistic_pythonop_export(): - class TestFunction(torch.autograd.Function): @staticmethod # bias is an optional argument @@ -1839,9 +1833,7 @@ def forward(self, model_input): ortmodule = ORTModule(TestModel(output_size)).train() _ = ortmodule(torch.randn(output_size, dtype=torch.float)) - onnx_nodes = ( - ortmodule._torch_module._execution_manager._training_manager._graph_transition_manager._exported_model_info.exported_model.graph.node - ) + onnx_nodes = ortmodule._torch_module._execution_manager._training_manager._graph_transition_manager._exported_model_info.exported_model.graph.node found_pythonop = False for node in onnx_nodes: diff --git a/orttraining/orttraining/test/python/orttraining_test_ortmodule_bert_classifier.py b/orttraining/orttraining/test/python/orttraining_test_ortmodule_bert_classifier.py index 41e1e0f5d0d57..0d5825fb3140e 100644 --- a/orttraining/orttraining/test/python/orttraining_test_ortmodule_bert_classifier.py +++ b/orttraining/orttraining/test/python/orttraining_test_ortmodule_bert_classifier.py @@ -12,10 +12,10 @@ import wget from sklearn.model_selection import train_test_split from torch.utils.data import DataLoader, RandomSampler, SequentialSampler, TensorDataset -from transformers import BertConfig # noqa: F401 from transformers import ( AdamW, AutoConfig, + BertConfig, # noqa: F401 BertForSequenceClassification, BertTokenizer, get_linear_schedule_with_warmup, @@ -376,7 +376,7 @@ def main(): # Device (CPU vs CUDA) if torch.cuda.is_available() and not args.no_cuda: device = torch.device("cuda") - print("There are %d GPU(s) available." % torch.cuda.device_count()) + print(f"There are {torch.cuda.device_count()} GPU(s) available.") print("We will use the GPU:", torch.cuda.get_device_name(0)) else: print("No GPU available, using the CPU instead.") @@ -429,7 +429,9 @@ def main(): # Create the learning rate scheduler. scheduler = get_linear_schedule_with_warmup( - optimizer, num_warmup_steps=0, num_training_steps=total_steps # Default value in run_glue.py + optimizer, + num_warmup_steps=0, + num_training_steps=total_steps, # Default value in run_glue.py ) # Seed random.seed(args.seed) diff --git a/orttraining/orttraining/test/python/orttraining_test_ortmodule_bert_classifier_autocast.py b/orttraining/orttraining/test/python/orttraining_test_ortmodule_bert_classifier_autocast.py index 801eb58727689..50f411c02a5b5 100644 --- a/orttraining/orttraining/test/python/orttraining_test_ortmodule_bert_classifier_autocast.py +++ b/orttraining/orttraining/test/python/orttraining_test_ortmodule_bert_classifier_autocast.py @@ -12,9 +12,14 @@ import wget from sklearn.model_selection import train_test_split from torch.utils.data import DataLoader, RandomSampler, SequentialSampler, TensorDataset -from transformers import AdamW # noqa: F401 -from transformers import BertConfig # noqa: F401 -from transformers import AutoConfig, BertForSequenceClassification, BertTokenizer, get_linear_schedule_with_warmup +from transformers import ( + AdamW, # noqa: F401 + AutoConfig, + BertConfig, # noqa: F401 + BertForSequenceClassification, + BertTokenizer, + get_linear_schedule_with_warmup, +) import onnxruntime from onnxruntime.training.ortmodule import DebugOptions, ORTModule @@ -376,7 +381,7 @@ def main(): # Device (CPU vs CUDA) if torch.cuda.is_available() and not args.no_cuda: device = torch.device("cuda") - print("There are %d GPU(s) available." % torch.cuda.device_count()) + print(f"There are {torch.cuda.device_count()} GPU(s) available.") print("We will use the GPU:", torch.cuda.get_device_name(0)) else: print("No GPU available, using the CPU instead.") @@ -432,7 +437,9 @@ def main(): # Create the learning rate scheduler. scheduler = get_linear_schedule_with_warmup( - optimizer, num_warmup_steps=0, num_training_steps=total_steps # Default value in run_glue.py + optimizer, + num_warmup_steps=0, + num_training_steps=total_steps, # Default value in run_glue.py ) scaler = torch.cuda.amp.GradScaler() diff --git a/orttraining/orttraining/test/python/orttraining_test_ortmodule_deepspeed_pipeline_parallel.py b/orttraining/orttraining/test/python/orttraining_test_ortmodule_deepspeed_pipeline_parallel.py index 46b172a39619b..174edf37756ea 100755 --- a/orttraining/orttraining/test/python/orttraining_test_ortmodule_deepspeed_pipeline_parallel.py +++ b/orttraining/orttraining/test/python/orttraining_test_ortmodule_deepspeed_pipeline_parallel.py @@ -108,7 +108,10 @@ def get_args(): print("Initialize deepspeed") model_engine, optimizer, _, _ = deepspeed.initialize( - args=args, model=model, model_parameters=params, training_data=ds # (x,y)# + args=args, + model=model, + model_parameters=params, + training_data=ds, # (x,y)# ) for step in range(args.steps): diff --git a/orttraining/orttraining/test/python/orttraining_test_ortmodule_onnx_ops.py b/orttraining/orttraining/test/python/orttraining_test_ortmodule_onnx_ops.py index 35e5bae3ea67e..d977d96e82503 100644 --- a/orttraining/orttraining/test/python/orttraining_test_ortmodule_onnx_ops.py +++ b/orttraining/orttraining/test/python/orttraining_test_ortmodule_onnx_ops.py @@ -30,7 +30,7 @@ def assert_gradients_match_and_reset_gradient( pt_named_params = list(pt_model.named_parameters()) self.assertEqual(len(ort_named_params), len(pt_named_params)) - for ort_named_param, pt_named_param in zip(ort_named_params, pt_named_params): + for ort_named_param, pt_named_param in zip(ort_named_params, pt_named_params, strict=False): ort_name, ort_param = ort_named_param pt_name, pt_param = pt_named_param @@ -69,9 +69,7 @@ def run_step(model, x): self.assert_values_are_close(ort_prediction, pt_prediction, **kwargs) self.assert_gradients_match_and_reset_gradient(ort_model, pt_model, **kwargs) - onnx_graph_inf = ( - ort_model._torch_module._execution_manager._training_manager._graph_transition_manager._exported_model_info.exported_model - ) + onnx_graph_inf = ort_model._torch_module._execution_manager._training_manager._graph_transition_manager._exported_model_info.exported_model onnx_graph_train = ort_model._torch_module._execution_manager._training_manager._onnx_models.optimized_model if debug: with open(f"debug_{name}_ortmodule_infer.onnx", "wb") as f: @@ -86,7 +84,7 @@ def run_step(model, x): if op_grad_type is not None: if isinstance(op_grad_type, tuple): text = str(onnx_graph_train) - if all(map(lambda op: (f'op_type: "{op}"') not in text, op_grad_type)): + if all((f'op_type: "{op}"') not in text for op in op_grad_type): raise AssertionError("Operator {} not found in {}.".format(" or ".join(op_grad_type), text)) else: self.assertIn(f'op_type: "{op_grad_type}"', str(onnx_graph_train)) @@ -135,7 +133,7 @@ def forward(self, input1): out = self.fc2(out) return out - return TestGatherElement, "GatherElementsGrad", dict(rtol=1e-04, atol=1e-05) + return TestGatherElement, "GatherElementsGrad", {"rtol": 1e-04, "atol": 1e-05} raise AssertionError(f"Unexpected name={name!r}.") diff --git a/orttraining/orttraining/test/python/orttraining_test_ortmodule_pytorch_ddp.py b/orttraining/orttraining/test/python/orttraining_test_ortmodule_pytorch_ddp.py index 5006b7c30766c..bb0fedb4938a1 100644 --- a/orttraining/orttraining/test/python/orttraining_test_ortmodule_pytorch_ddp.py +++ b/orttraining/orttraining/test/python/orttraining_test_ortmodule_pytorch_ddp.py @@ -112,7 +112,7 @@ def demo_checkpoint(rank, world_size, use_ort_module): # 0 saves it. dist.barrier() # configure map_location properly - map_location = {"cuda:%d" % 0: "cuda:%d" % rank} + map_location = {"cuda:0": f"cuda:{rank}"} ddp_model.load_state_dict(torch.load(CHECKPOINT_PATH, map_location=map_location)) optimizer.zero_grad() diff --git a/orttraining/orttraining/test/python/orttraining_test_ortmodule_triton.py b/orttraining/orttraining/test/python/orttraining_test_ortmodule_triton.py index 0c381d70ca4c1..4e0022193909b 100644 --- a/orttraining/orttraining/test/python/orttraining_test_ortmodule_triton.py +++ b/orttraining/orttraining/test/python/orttraining_test_ortmodule_triton.py @@ -99,7 +99,7 @@ def _torch_softmax(input, **kwargs): def _torch_reduce(input, func, **kwargs): rank = len(input.shape) - axes = kwargs.get("axes", [idx for idx in range(rank)]) + axes = kwargs.get("axes", list(range(rank))) keepdims = kwargs.get("keepdims", True) axes = [axis if axis >= 0 else rank + axis for axis in axes] axes.sort(reverse=True) @@ -206,7 +206,7 @@ def _run_op_test(op_type, onnx_dtype, create_model_func, gen_inputs_func, **kwar if isinstance(pt_outputs, tuple): assert isinstance(ort_outputs, tuple) assert len(pt_outputs) == len(ort_outputs) - for pt_output, ort_output in zip(pt_outputs, ort_outputs): + for pt_output, ort_output in zip(pt_outputs, ort_outputs, strict=False): _test_helpers.assert_values_are_close(pt_output, _from_dlpack(ort_output), rtol=rtol, atol=atol) else: _test_helpers.assert_values_are_close(pt_outputs, _from_dlpack(ort_outputs), rtol=rtol, atol=atol) @@ -489,7 +489,7 @@ def test_dropout_op(onnx_dtype, input_shape_and_ratio): def _check_output(x, y, mask, ratio): all_count = 0 masked_count = 0 - for x_value, y_value, mask_value in zip(x, y, mask): + for x_value, y_value, mask_value in zip(x, y, mask, strict=False): if mask_value: assert abs(y_value - x_value / (1.0 - ratio)) < 0.05 else: diff --git a/orttraining/orttraining/test/python/orttraining_test_ortvalue.py b/orttraining/orttraining/test/python/orttraining_test_ortvalue.py index 317efa0061865..d65898fdaedd7 100644 --- a/orttraining/orttraining/test/python/orttraining_test_ortvalue.py +++ b/orttraining/orttraining/test/python/orttraining_test_ortvalue.py @@ -104,7 +104,7 @@ def testOrtValueVector_float32(self): vect.push_back(ortvalue._ortvalue) self.assertEqual(len(vect.bool_tensor_indices()), 0) self.assertEqual(len(vect), 2) - for i, (ov, ar) in enumerate(zip(vect, narrays)): + for i, (ov, ar) in enumerate(zip(vect, narrays, strict=False)): ovar = ov.numpy() assert_almost_equal(ar, ovar) self.assertEqual(ov.element_type(), vect.element_type_at(i)) @@ -120,7 +120,7 @@ def testOrtValueVector_bool(self): vect.push_back(ortvalue._ortvalue) self.assertEqual(vect.bool_tensor_indices(), [0, 1]) self.assertEqual(len(vect), 2) - for ov, ar in zip(vect, narrays): + for ov, ar in zip(vect, narrays, strict=False): ovar = ov.numpy() assert_almost_equal(ar, ovar) @@ -143,16 +143,16 @@ def OrtValueVectorDlPackOrtValue(self, my_to_tensor, tensor_type, device, dtype= self.assertIn("PyCapsule", str(type(converted_values[0]))) converted_values = [C_OrtValue.from_dlpack(o, False) for o in converted_values] else: - assert all(map(lambda v: isinstance(v, tensor_type), converted_values)) + assert all(isinstance(v, tensor_type) for v in converted_values) # We make sure the function does not leak any python object. cf = [sys.getrefcount(o) for o in converted_values] - dummy = [np.array([[0, 1]]), dict(a=3)] + dummy = [np.array([[0, 1]]), {"a": 3}] cf2 = [sys.getrefcount(o) for o in dummy] self.assertEqual(cf, cf2) # it should be [3, 3] ptr2 = [] - for av1, v2 in zip(narrays, converted_values): + for av1, v2 in zip(narrays, converted_values, strict=False): ptr2.append(v2.data_ptr()) if hasattr(v2, "cpu"): av2 = v2.cpu().numpy() @@ -285,7 +285,7 @@ def _ortvalues_to_torch_tensor_ortvaluevector(self, device, tensor_type, new_imp for t in tensors: assert isinstance(t, torch.Tensor) self.assertEqual(ptr, [t.data_ptr() for t in tensors]) - assert all(map(lambda v: isinstance(v, tensor_type), tensors)) + assert all(isinstance(v, tensor_type) for v in tensors) def test_ortvalues_to_torch_tensor_ortvaluevector_cpu_new(self): device = torch.device("cpu") @@ -336,7 +336,7 @@ def _ortvalues_to_torch_tensor_list(self, device, tensor_type, new_impl): tensors = _ortvalues_to_torch_tensor(vect, device) self.assertEqual(len(tensors), len(vect)) self.assertEqual(ptr, [t.data_ptr() for t in tensors]) - assert all(map(lambda v: isinstance(v, tensor_type), tensors)) + assert all(isinstance(v, tensor_type) for v in tensors) def test_ortvalues_to_torch_tensor_list_cpu_new(self): device = torch.device("cpu") diff --git a/orttraining/orttraining/test/python/orttraining_test_sampler.py b/orttraining/orttraining/test/python/orttraining_test_sampler.py index 68f9ac5052134..0a6b54d972a46 100644 --- a/orttraining/orttraining/test/python/orttraining_test_sampler.py +++ b/orttraining/orttraining/test/python/orttraining_test_sampler.py @@ -54,7 +54,7 @@ def test_load_balancing_data_sampler_shuffles_and_balances_load(): random.shuffle(complexities) samples = [torch.FloatTensor([val]) for val in range(100)] - samples_and_complexities = list(zip(samples, complexities)) + samples_and_complexities = list(zip(samples, complexities, strict=False)) dataset = MyDataset(samples_and_complexities) def complexity_fn(sample): @@ -67,7 +67,7 @@ def complexity_fn(sample): dataset, complexity_fn=complexity_fn, world_size=2, rank=1, shuffle=True ) - for index0, index1 in zip(data_sampler0, data_sampler1): + for index0, index1 in zip(data_sampler0, data_sampler1, strict=False): assert samples_and_complexities[index0][1] == samples_and_complexities[index1][1] @@ -90,7 +90,7 @@ def complexity_fn(sample): dataset, complexity_fn=complexity_fn, world_size=1, rank=0, shuffle=False, group_size=8 ) - for index, sorted_sample in zip(data_sampler, samples_and_complexities_sorted): + for index, sorted_sample in zip(data_sampler, samples_and_complexities_sorted, strict=False): assert samples_and_complexities[index][1] == sorted_sample[1] @@ -127,7 +127,9 @@ def complexity_fn(sample): dataset, complexity_fn=complexity_fn, world_size=1, rank=0, shuffle=True, group_size=8 ) - for index, sorted_and_shuffled_sample in zip(data_sampler, samples_and_complexities_sorted_and_shuffled): + for index, sorted_and_shuffled_sample in zip( + data_sampler, samples_and_complexities_sorted_and_shuffled, strict=False + ): assert samples_and_complexities[index][1] == sorted_and_shuffled_sample[1] diff --git a/orttraining/orttraining/test/python/orttraining_test_utilities.py b/orttraining/orttraining/test/python/orttraining_test_utilities.py index faa04f327be7f..06e38c3ce62a0 100644 --- a/orttraining/orttraining/test/python/orttraining_test_utilities.py +++ b/orttraining/orttraining/test/python/orttraining_test_utilities.py @@ -256,7 +256,10 @@ def _recursive_compare(real, expected): if flag == 0: out, schema = extract_data_and_schema(raw_data) - assert all([torch.allclose(o, d) if isinstance(o, torch.Tensor) else o == d for o, d in zip(out, flatten_data)]) + assert all( + torch.allclose(o, d) if isinstance(o, torch.Tensor) else o == d + for o, d in zip(out, flatten_data, strict=False) + ) if not isinstance(raw_data, torch.Tensor): assert type(schema) is type(raw_data) @@ -274,10 +277,8 @@ def _recursive_compare(real, expected): assert raw_data == schema else: assert all( - [ - torch.allclose(o, d) if isinstance(o, torch.Tensor) else o == d - for o, d in zip(out, flatten_data_constant_as_tensor) - ] + torch.allclose(o, d) if isinstance(o, torch.Tensor) else o == d + for o, d in zip(out, flatten_data_constant_as_tensor, strict=False) ) elif flag == 1: diff --git a/orttraining/orttraining/test/python/qat_poc_example/train.py b/orttraining/orttraining/test/python/qat_poc_example/train.py index a25c071c58a48..45c0aa77ae909 100644 --- a/orttraining/orttraining/test/python/qat_poc_example/train.py +++ b/orttraining/orttraining/test/python/qat_poc_example/train.py @@ -68,8 +68,8 @@ def train_model(qat_train_model, qat_eval_model, qat_optimizer_model, qat_checkp # Training loop epochs = 5 for epoch in range(epochs): - logging.info(f"Starting epoch: {epoch+1}") + logging.info(f"Starting epoch: {epoch + 1}") training_loss = _train_epoch(model, optimizer, train_loader) eval_loss = _eval(model, test_loader) - logging.info(f"End of epoch: {epoch+1}, training loss: {training_loss:.4f}, eval loss: {eval_loss:.4f}") + logging.info(f"End of epoch: {epoch + 1}, training loss: {training_loss:.4f}, eval loss: {eval_loss:.4f}") diff --git a/orttraining/orttraining/test/training_ops/cuda/softmax_test.cc b/orttraining/orttraining/test/training_ops/cuda/softmax_test.cc index ad6ee1e0950e9..9ced022aab850 100644 --- a/orttraining/orttraining/test/training_ops/cuda/softmax_test.cc +++ b/orttraining/orttraining/test/training_ops/cuda/softmax_test.cc @@ -215,14 +215,22 @@ TEST(CudaKernelTest, SoftmaxGrad_LargeTensor_LastAxis_Float16) { std::vector dY_dims{8, 16, 2048}; std::vector Y_dims{8, 16, 2048}; std::vector dX_dims{8, 16, 2048}; +#if USE_ROCM + TestSoftmaxGrad(dY_dims, Y_dims, dX_dims, 2, false, 1.5e-2, 1.5e-2); +#else TestSoftmaxGrad(dY_dims, Y_dims, dX_dims, 2, false, 1e-3, 1e-3); +#endif } TEST(CudaKernelTest, SoftmaxGrad_LargeTensor_LastAxis_Float16_NoPowerOfTwo) { std::vector dY_dims{8, 16, 1500}; std::vector Y_dims{8, 16, 1500}; std::vector dX_dims{8, 16, 1500}; +#if USE_ROCM + TestSoftmaxGrad(dY_dims, Y_dims, dX_dims, 2, false, 1.7e-2, 1.7e-2); +#else TestSoftmaxGrad(dY_dims, Y_dims, dX_dims, 2, false, 1e-3, 1e-3); +#endif } // large tensor to check cuda DNN softmax backward @@ -238,16 +246,26 @@ TEST(CudaKernelTest, SoftmaxGrad_LargeTensor_AllAxis_Float16) { std::vector dY_dims{8, 16, 512}; std::vector Y_dims{8, 16, 512}; std::vector dX_dims{8, 16, 512}; +#if USE_ROCM + TestSoftmaxGrad(dY_dims, Y_dims, dX_dims, 0, false, 1.5e-2, 1.5e-2); + TestSoftmaxGrad(dY_dims, Y_dims, dX_dims, 1, false, 1.5e-2, 1.5e-2); +#else TestSoftmaxGrad(dY_dims, Y_dims, dX_dims, 0, false, 1e-3, 1e-3); TestSoftmaxGrad(dY_dims, Y_dims, dX_dims, 1, false, 1e-3, 1e-3); +#endif } TEST(CudaKernelTest, SoftmaxGrad_LargeTensor_AllAxis_Float16_NoPowerOfTwo) { std::vector dY_dims{8, 16, 1500}; std::vector Y_dims{8, 16, 1500}; std::vector dX_dims{8, 16, 1500}; +#if USE_ROCM + TestSoftmaxGrad(dY_dims, Y_dims, dX_dims, 0, false, 2.5e-2, 2.5e-2); + TestSoftmaxGrad(dY_dims, Y_dims, dX_dims, 1, false, 2.5e-2, 2.5e-2); +#else TestSoftmaxGrad(dY_dims, Y_dims, dX_dims, 0, false, 1e-3, 1e-3); TestSoftmaxGrad(dY_dims, Y_dims, dX_dims, 1, false, 1e-3, 1e-3); +#endif } TEST(CudaKernelTest, LogSoftmaxGrad_SmallTensor_LastAxis) { @@ -276,14 +294,23 @@ TEST(CudaKernelTest, LogSoftmaxGrad_LargeTensor_LastAxis_Float16) { std::vector dY_dims{8, 16, 2048}; std::vector Y_dims{8, 16, 2048}; std::vector dX_dims{8, 16, 2048}; +#if USE_ROCM + TestSoftmaxGrad(dY_dims, Y_dims, dX_dims, 2, true, 3.5e-2, 3.5e-2); +#else TestSoftmaxGrad(dY_dims, Y_dims, dX_dims, 2, true, 1e-3, 1e-3); +#endif } TEST(CudaKernelTest, LogSoftmaxGrad_LargeTensor_LastAxis_Float16_NoPowerOfTwo) { std::vector dY_dims{8, 16, 1500}; std::vector Y_dims{8, 16, 1500}; std::vector dX_dims{8, 16, 1500}; +#if USE_ROCM + // FIXME: Excessive numerical errors + TestSoftmaxGrad(dY_dims, Y_dims, dX_dims, 2, true, 1.0, 5e-2); +#else TestSoftmaxGrad(dY_dims, Y_dims, dX_dims, 2, true, 1e-3, 1e-3); +#endif } TEST(CudaKernelTest, LogSoftmaxGrad_LargeTensor_AllAxis) { @@ -298,16 +325,26 @@ TEST(CudaKernelTest, LogSoftmaxGrad_LargeTensor_AllAxis_Float16) { std::vector dY_dims{8, 16, 512}; std::vector Y_dims{8, 16, 512}; std::vector dX_dims{8, 16, 512}; +#if USE_ROCM + TestSoftmaxGrad(dY_dims, Y_dims, dX_dims, 0, true, 1.5e-2, 1.5e-2); + TestSoftmaxGrad(dY_dims, Y_dims, dX_dims, 1, true, 1.5e-2, 1.5e-2); +#else TestSoftmaxGrad(dY_dims, Y_dims, dX_dims, 0, true, 1e-3, 1e-3); TestSoftmaxGrad(dY_dims, Y_dims, dX_dims, 1, true, 1e-3, 1e-3); +#endif } TEST(CudaKernelTest, LogSoftmaxGrad_LargeTensor_AllAxis_Float16_NoPowerOfTwo) { std::vector dY_dims{8, 16, 1500}; std::vector Y_dims{8, 16, 1500}; std::vector dX_dims{8, 16, 1500}; +#if USE_ROCM + TestSoftmaxGrad(dY_dims, Y_dims, dX_dims, 0, true, 4.5e-2, 4.5e-2); + TestSoftmaxGrad(dY_dims, Y_dims, dX_dims, 1, true, 4.5e-2, 4.5e-2); +#else TestSoftmaxGrad(dY_dims, Y_dims, dX_dims, 0, true, 1e-3, 1e-3); TestSoftmaxGrad(dY_dims, Y_dims, dX_dims, 1, true, 1e-3, 1e-3); +#endif } static void TestSoftmaxGrad_13(const std::vector& dY_dims, diff --git a/orttraining/tools/ci_test/compare_results.py b/orttraining/tools/ci_test/compare_results.py index 0ab0a1246a421..957b609f53726 100644 --- a/orttraining/tools/ci_test/compare_results.py +++ b/orttraining/tools/ci_test/compare_results.py @@ -31,7 +31,7 @@ def _printf_stderr(fmt, *args): def _read_results_file(results_path): with open(results_path) as results_file: csv_reader = csv.DictReader(results_file) - return [row for row in csv_reader] + return list(csv_reader) def _compare_results(expected_results, actual_results, field_comparisons): @@ -43,7 +43,7 @@ def _compare_results(expected_results, actual_results, field_comparisons): return False mismatch_detected = False - for row_idx, (expected_row, actual_row) in enumerate(zip(expected_results, actual_results)): + for row_idx, (expected_row, actual_row) in enumerate(zip(expected_results, actual_results, strict=False)): for field_name, comparison in field_comparisons.items(): actual, expected = actual_row[field_name], expected_row[field_name] if not comparison.fn(actual, expected): diff --git a/orttraining/tools/ci_test/run_batch_size_test.py b/orttraining/tools/ci_test/run_batch_size_test.py index 348d490678e9a..a1bf3fd71ca82 100755 --- a/orttraining/tools/ci_test/run_batch_size_test.py +++ b/orttraining/tools/ci_test/run_batch_size_test.py @@ -106,7 +106,7 @@ def main(): ] if config.enable_mixed_precision: - cmds.append("--use_mixed_precision"), + (cmds.append("--use_mixed_precision"),) subprocess.run(cmds, timeout=120).check_returncode() # noqa: PLW1510 diff --git a/orttraining/tools/ci_test/run_bert_perf_test.py b/orttraining/tools/ci_test/run_bert_perf_test.py index 13d5e9f140958..c848621c8845a 100644 --- a/orttraining/tools/ci_test/run_bert_perf_test.py +++ b/orttraining/tools/ci_test/run_bert_perf_test.py @@ -94,8 +94,8 @@ def main(): ] if c.use_mixed_precision: - cmds.append("--use_mixed_precision"), - cmds.append("--allreduce_in_fp16"), + (cmds.append("--use_mixed_precision"),) + (cmds.append("--allreduce_in_fp16"),) subprocess.run(cmds).check_returncode() # noqa: PLW1510 if c.expected_perf > 0.0: diff --git a/orttraining/tools/ci_test/run_gpt2_perf_test.py b/orttraining/tools/ci_test/run_gpt2_perf_test.py index 18e59d275b6b5..1df71f02b7e6a 100644 --- a/orttraining/tools/ci_test/run_gpt2_perf_test.py +++ b/orttraining/tools/ci_test/run_gpt2_perf_test.py @@ -60,7 +60,7 @@ def main(): ] if c.use_mixed_precision: - cmds.append("--use_mixed_precision"), + (cmds.append("--use_mixed_precision"),) subprocess.run(cmds).check_returncode() # noqa: PLW1510 diff --git a/orttraining/tools/scripts/gpt2_model_transform.py b/orttraining/tools/scripts/gpt2_model_transform.py index 50bfda4b407af..d5e6d6db44e83 100644 --- a/orttraining/tools/scripts/gpt2_model_transform.py +++ b/orttraining/tools/scripts/gpt2_model_transform.py @@ -18,7 +18,7 @@ def add_name(model): for i, node in enumerate(model.graph.node): - node.name = "%s_%d" % (node.op_type, i) + node.name = f"{node.op_type}_{i}" def find_input_node(model, arg): @@ -139,7 +139,7 @@ def process_concat(model): # insert new shape to reshape for index, reshape_node_index in enumerate(new_nodes): shape_tensor = numpy_helper.from_array(np.asarray(new_nodes[reshape_node_index], dtype=np.int64)) - const_node = add_const(model, "concat_shape_node_%d" % index, "concat_shape_%d" % index, shape_tensor) + const_node = add_const(model, f"concat_shape_node_{index}", f"concat_shape_{index}", shape_tensor) reshape_node = model.graph.node[reshape_node_index] reshape_node.input[1] = const_node.output[0] # delete nodes @@ -227,13 +227,13 @@ def process_dropout(model): if node.op_type == "Dropout": new_dropout = model.graph.node.add() new_dropout.op_type = "TrainableDropout" - new_dropout.name = "TrainableDropout_%d" % index + new_dropout.name = f"TrainableDropout_{index}" # make ratio node ratio = np.asarray([node.attribute[0].f], dtype=np.float32) print(ratio.shape) ratio_value = numpy_helper.from_array(ratio) ratio_node = add_const( - model, "dropout_node_ratio_%d" % index, "dropout_node_ratio_%d" % index, t_value=ratio_value + model, f"dropout_node_ratio_{index}", f"dropout_node_ratio_{index}", t_value=ratio_value ) print(ratio_node) new_dropout.input.extend([node.input[0], ratio_node.output[0]]) diff --git a/orttraining/tools/scripts/model_transform.py b/orttraining/tools/scripts/model_transform.py index e87429d10bf88..6596a4ec6a3c5 100644 --- a/orttraining/tools/scripts/model_transform.py +++ b/orttraining/tools/scripts/model_transform.py @@ -18,7 +18,7 @@ def add_name(model): for i, node in enumerate(model.graph.node): - node.name = "%s_%d" % (node.op_type, i) + node.name = f"{node.op_type}_{i}" def find_input_node(model, arg): @@ -120,7 +120,7 @@ def process_concat(model): # insert new shape to reshape for index, reshape_node_index in enumerate(new_nodes): shape_tensor = numpy_helper.from_array(np.asarray(new_nodes[reshape_node_index], dtype=np.int64)) - const_node = add_const(model, "concat_shape_node_%d" % index, "concat_shape_%d" % index, shape_tensor) + const_node = add_const(model, f"concat_shape_node_{index}", f"concat_shape_{index}", shape_tensor) reshape_node = model.graph.node[reshape_node_index] reshape_node.input[1] = const_node.output[0] # delete nodes @@ -251,13 +251,13 @@ def process_dropout(model): if node.op_type == "Dropout": new_dropout = model.graph.node.add() new_dropout.op_type = "TrainableDropout" - new_dropout.name = "TrainableDropout_%d" % index + new_dropout.name = f"TrainableDropout_{index}" # make ratio node ratio = np.asarray([node.attribute[0].f], dtype=np.float32) print(ratio.shape) ratio_value = numpy_helper.from_array(ratio) ratio_node = add_const( - model, "dropout_node_ratio_%d" % index, "dropout_node_ratio_%d" % index, t_value=ratio_value + model, f"dropout_node_ratio_{index}", f"dropout_node_ratio_{index}", t_value=ratio_value ) print(ratio_node) new_dropout.input.extend([node.input[0], ratio_node.output[0]]) diff --git a/orttraining/tools/scripts/nv_run_pretraining.py b/orttraining/tools/scripts/nv_run_pretraining.py index 8c57101f72ddb..565f5af84d4fa 100644 --- a/orttraining/tools/scripts/nv_run_pretraining.py +++ b/orttraining/tools/scripts/nv_run_pretraining.py @@ -14,7 +14,6 @@ # limitations under the License. """BERT finetuning runner.""" - import argparse # ================== @@ -337,7 +336,7 @@ def prepare_model_and_optimizer(args, device): optimizer._lazy_init_maybe_master_weights() optimizer._amp_stash.lazy_init_called = True optimizer.load_state_dict(checkpoint["optimizer"]) - for param, saved_param in zip(amp.master_params(optimizer), checkpoint["master params"]): + for param, saved_param in zip(amp.master_params(optimizer), checkpoint["master params"], strict=False): param.data.copy_(saved_param.data) if args.local_rank != -1: diff --git a/orttraining/tools/scripts/opset12_model_transform.py b/orttraining/tools/scripts/opset12_model_transform.py index 790bdc34e1ff7..915ccf40a0242 100644 --- a/orttraining/tools/scripts/opset12_model_transform.py +++ b/orttraining/tools/scripts/opset12_model_transform.py @@ -68,7 +68,7 @@ def process_trainabledropout(model): if node.op_type == "TrainableDropout": new_dropout = model.graph.node.add() new_dropout.op_type = "Dropout" - new_dropout.name = "Dropout_%d" % index + new_dropout.name = f"Dropout_{index}" # add seed attribute attr = new_dropout.attribute.add() attr.name = "seed" @@ -83,14 +83,14 @@ def process_trainabledropout(model): ratio_scalar = ratio_data.astype(np.float32).reshape(()) ratio_value = numpy_helper.from_array(ratio_scalar, "ratio") new_ratio_node = add_const( - model, "dropout_ratio_node_%d" % index, "dropout_ratio_%d" % index, t_value=ratio_value + model, f"dropout_ratio_node_{index}", f"dropout_ratio_{index}", t_value=ratio_value ) index += 1 # add training_mode output mode_scalar = np.asarray([True]).astype(bool).reshape(()) mode_value = numpy_helper.from_array(mode_scalar, "training_mode") training_mode_node = add_const( - model, "dropout_training_mode_node_%d" % index, "dropout_training_mode_%d" % index, t_value=mode_value + model, f"dropout_training_mode_node_{index}", f"dropout_training_mode_{index}", t_value=mode_value ) index += 1 diff --git a/orttraining/tools/scripts/watch_experiment.py b/orttraining/tools/scripts/watch_experiment.py index d2255b63c66b5..19029b9559f24 100644 --- a/orttraining/tools/scripts/watch_experiment.py +++ b/orttraining/tools/scripts/watch_experiment.py @@ -37,7 +37,7 @@ experiment = Experiment(workspace=ws, name=args.experiment) # Find the Run -runs = [r for r in experiment.get_runs()] +runs = list(experiment.get_runs()) if len(runs) == 0: print(f"No runs found in Experiment '{args.experiment}'") diff --git a/pyproject.toml b/pyproject.toml index 40e6eb96dff94..f95fb0ff955a4 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,58 +1,24 @@ -[tool.black] -line-length = 120 -# NOTE: Do not extend the exclude list. Edit .lintrunner.toml instead -extend-exclude = "cmake|onnxruntime/core/flatbuffers/" -# NOTE: use the minimum supported python version as target-version -target-version = ["py310"] - -[tool.isort] -# NOTE: Do not extend the exclude list. Edit .lintrunner.toml instead -profile = "black" -line_length = 120 -extend_skip_glob = [ - "cmake/*", - "orttraining/*", - "onnxruntime/core/flatbuffers/*", -] - [tool.pydocstyle] convention = "google" -[tool.pylint.BASIC] -good-names = [ - "a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k", "l", "m", "n", - "p", "q", "r", "s", "t", "u", "v", "w", "ex", "Run", "_", "x", "y", "z" -] - -[tool.pylint.messages_control] -disable = [ - "format", - "line-too-long", - "import-error", - "no-name-in-module", - "no-member", - "too-many-arguments", - "too-many-locals", - "too-few-public-methods", - "missing-docstring", - "fixme", -] - [tool.pyright] exclude = ["onnxruntime/core/flatbuffers/*"] reportMissingImports = false [tool.ruff] # NOTE: Do not create an exclude list. Edit .lintrunner.toml instead -target-version = "py38" +target-version = "py310" +line-length = 120 [tool.ruff.lint] select = [ "B", # flake8-bugbear + "C4", # flake8-comprehensions "E", # pycodestyle "F", # Pyflakes "FURB", # refurb "G", # flake8-logging-format + "I", # isort "ISC", # flake8-implicit-str-concat "N", # pep8-naming "NPY", # numpy @@ -74,6 +40,7 @@ select = [ # Always include a comment to explain why. ignore = [ "B028", # FIXME: Add stacklevel to warnings + "C408", # Sometimes it is preferable when we construct kwargs "E501", # Line length controlled by black "G004", # FIXME: Enable when the rule can be autofixed "N803", # Argument casing @@ -90,11 +57,7 @@ ignore = [ "SIM108", # We don't encourage ternary operators "SIM114", # Don't combine if branches for debugability "SIM116", # Don't use dict lookup to replace if-else -] -ignore-init-module-imports = true -unfixable = [ - "F401", # Unused imports - "SIM112", # Use upper case for env vars + "UP038", # Using X | Y in isinstance checks is a little aggresive ] [tool.ruff.lint.per-file-ignores] diff --git a/requirements-lintrunner.txt b/requirements-lintrunner.txt index 72d9ce72ea7cb..2ca562e5f5c2c 100644 --- a/requirements-lintrunner.txt +++ b/requirements-lintrunner.txt @@ -3,9 +3,6 @@ lintrunner==0.12.5 lintrunner-adapters==0.12.4 # RUFF -ruff==0.5.4 -# BLACK-ISORT -black==24.2.0 -isort==5.13.2 +ruff==0.9.3 # CLANGFORMAT -clang-format==18.1.8 +clang-format==19.1.7 diff --git a/setup.py b/setup.py index c1580eeb9e8f9..6481f58f69070 100644 --- a/setup.py +++ b/setup.py @@ -66,6 +66,8 @@ def parse_arg_remove_string(argv, arg_name_equal): elif parse_arg_remove_boolean(sys.argv, "--use_rocm"): is_rocm = True rocm_version = parse_arg_remove_string(sys.argv, "--rocm_version=") + if parse_arg_remove_boolean(sys.argv, "--use_migraphx"): + is_migraphx = True elif parse_arg_remove_boolean(sys.argv, "--use_migraphx"): is_migraphx = True elif parse_arg_remove_boolean(sys.argv, "--use_openvino"): @@ -90,8 +92,10 @@ def parse_arg_remove_string(argv, arg_name_equal): is_qnn = True package_name = "onnxruntime-qnn" -if is_rocm or is_migraphx: - package_name = "onnxruntime-rocm" +if is_rocm: + package_name = "onnxruntime-rocm" if not nightly_build else "ort-rocm-nightly" +elif is_migraphx: + package_name = "onnxruntime-migraphx" if not nightly_build else "ort-migraphx-nightly" # PEP 513 defined manylinux1_x86_64 and manylinux1_i686 # PEP 571 defined manylinux2010_x86_64 and manylinux2010_i686 @@ -311,17 +315,20 @@ def finalize_options(self): providers_tensorrt_or_migraphx = "onnxruntime_providers_" + ("migraphx" if is_migraphx else "tensorrt") providers_openvino = "onnxruntime_providers_openvino" providers_cann = "onnxruntime_providers_cann" +providers_qnn = "onnxruntime_providers_qnn" if platform.system() == "Linux": providers_cuda_or_rocm = "lib" + providers_cuda_or_rocm + ".so" providers_tensorrt_or_migraphx = "lib" + providers_tensorrt_or_migraphx + ".so" providers_openvino = "lib" + providers_openvino + ".so" providers_cann = "lib" + providers_cann + ".so" + providers_qnn = "lib" + providers_qnn + ".so" elif platform.system() == "Windows": providers_cuda_or_rocm = providers_cuda_or_rocm + ".dll" providers_tensorrt_or_migraphx = providers_tensorrt_or_migraphx + ".dll" providers_openvino = providers_openvino + ".dll" providers_cann = providers_cann + ".dll" + providers_qnn = providers_qnn + ".dll" # Additional binaries dl_libs = [] @@ -341,8 +348,9 @@ def finalize_options(self): dl_libs.append(providers_cuda_or_rocm) dl_libs.append(providers_tensorrt_or_migraphx) dl_libs.append(providers_cann) + dl_libs.append(providers_qnn) dl_libs.append("libonnxruntime.so*") - # DNNL, TensorRT & OpenVINO EPs are built as shared libs + # DNNL, TensorRT, OpenVINO, and QNN EPs are built as shared libs libs.extend(["libonnxruntime_providers_shared.so"]) libs.extend(["libonnxruntime_providers_dnnl.so"]) libs.extend(["libonnxruntime_providers_openvino.so"]) @@ -350,6 +358,7 @@ def finalize_options(self): libs.append(providers_cuda_or_rocm) libs.append(providers_tensorrt_or_migraphx) libs.append(providers_cann) + libs.append(providers_qnn) # QNN qnn_deps = [ "libQnnCpu.so", @@ -388,13 +397,14 @@ def finalize_options(self): providers_cann, "onnxruntime.dll", ] - # DNNL, TensorRT & OpenVINO EPs are built as shared libs + # DNNL, TensorRT, OpenVINO, and QNN EPs are built as shared libs libs.extend(["onnxruntime_providers_shared.dll"]) libs.extend(["onnxruntime_providers_dnnl.dll"]) libs.extend(["onnxruntime_providers_tensorrt.dll"]) libs.extend(["onnxruntime_providers_openvino.dll"]) libs.extend(["onnxruntime_providers_cuda.dll"]) libs.extend(["onnxruntime_providers_vitisai.dll"]) + libs.extend(["onnxruntime_providers_qnn.dll"]) # DirectML Libs libs.extend(["DirectML.dll"]) # QNN V68/V73 dependencies @@ -477,10 +487,10 @@ def finalize_options(self): if path.isdir(path.join("onnxruntime", "external")): # Gather all files under onnxruntime/external directory. extra.extend( - list( + [ str(Path(*Path(x).parts[1:])) for x in list(iglob(path.join(path.join("onnxruntime", "external"), "**/*.*"), recursive=True)) - ) + ] ) packages = [ diff --git a/tools/ci_build/amd_hipify.py b/tools/ci_build/amd_hipify.py index 6a8154681ed97..d2739cd805fb0 100644 --- a/tools/ci_build/amd_hipify.py +++ b/tools/ci_build/amd_hipify.py @@ -187,4 +187,4 @@ def hipify(hipify_perl_path, src_file_path, dst_file_path): parser.add_argument("src", help="src") args = parser.parse_args() - hipify(args.hipify_perl, args.src, args.output) + hipify(os.path.join(os.path.dirname(__file__), "hipify-perl"), args.src, args.output) diff --git a/tools/ci_build/build.py b/tools/ci_build/build.py index ed5efac274df0..cc733f859fe0b 100644 --- a/tools/ci_build/build.py +++ b/tools/ci_build/build.py @@ -2,6 +2,7 @@ # Copyright (c) Microsoft Corporation. All rights reserved. # SPDX-FileCopyrightText: Copyright 2024 Arm Limited and/or its affiliates # Licensed under the MIT License. +from __future__ import annotations import argparse import contextlib @@ -128,6 +129,17 @@ def invalid_hetero_build(): return device_read +def _qnn_verify_library_kind(library_kind): + choices = ["shared_lib", "static_lib"] + if library_kind not in choices: + print("\nYou have specified an invalid library kind for QNN EP.") + print(f"The invalid library kind was: {library_kind}") + print("Provide a library kind from the following options: ", choices) + print(f"Example: --use_qnn {choices[0]}") + sys.exit("Incorrect build configuration") + return library_kind + + def parse_arguments(): class Parser(argparse.ArgumentParser): # override argument file line parsing behavior - allow multiple arguments per line and handle quotes @@ -451,9 +463,7 @@ def convert_arg_line_to_args(self, arg_line): parser.add_argument( "--apple_deploy_target", type=str, - help="Specify the minimum version of the target platform " - "(e.g. macOS or iOS)" - "This is only supported on MacOS", + help="Specify the minimum version of the target platform (e.g. macOS or iOS)This is only supported on MacOS", ) # A 32-bit progress doesn't have enough memory to run all the tests in onnxruntime_test_all. # Mimalloc is incompatible with address sanitizer. @@ -579,7 +589,14 @@ def convert_arg_line_to_args(self, arg_line): parser.add_argument("--use_jsep", action="store_true", help="Build with JavaScript kernels.") parser.add_argument("--use_webgpu", action="store_true", help="Build with WebGPU support.") parser.add_argument("--use_external_dawn", action="store_true", help="Treat Dawn as an external dependency.") - parser.add_argument("--use_qnn", action="store_true", help="Build with QNN support.") + parser.add_argument( + "--use_qnn", + nargs="?", + const="shared_lib", # If provide --use_qnn without an arg, defaults to a shared library. + type=_qnn_verify_library_kind, + help="Build with QNN support. Specify 'shared_lib' or 'static_lib' to build QNN EP " + "as a shared or static library, respectively.", + ) parser.add_argument("--qnn_home", help="Path to QNN SDK dir.") parser.add_argument("--use_rknpu", action="store_true", help="Build with RKNPU.") parser.add_argument("--use_preinstalled_eigen", action="store_true", help="Use pre-installed Eigen.") @@ -765,6 +782,12 @@ def convert_arg_line_to_args(self, arg_line): parser.add_argument("--use_triton_kernel", action="store_true", help="Use triton compiled kernels") parser.add_argument("--use_lock_free_queue", action="store_true", help="Use lock-free task queue for threadpool.") + parser.add_argument( + "--enable_generic_interface", + action="store_true", + help="build ORT shared library and compatible bridge with primary EPs(tensorRT, OpenVino, Qnn, vitisai) but not tests", + ) + if not is_windows(): parser.add_argument( "--allow_running_as_root", @@ -1025,6 +1048,12 @@ def generate_build_tree( "-Donnxruntime_USE_TENSORRT=" + ("ON" if args.use_tensorrt else "OFF"), "-Donnxruntime_USE_TENSORRT_BUILTIN_PARSER=" + ("ON" if args.use_tensorrt_builtin_parser and not args.use_tensorrt_oss_parser else "OFF"), + # interface variables are used only for building onnxruntime/onnxruntime_shared.dll but not EPs + "-Donnxruntime_USE_TENSORRT_INTERFACE=" + ("ON" if args.enable_generic_interface else "OFF"), + "-Donnxruntime_USE_CUDA_INTERFACE=" + ("ON" if args.enable_generic_interface else "OFF"), + "-Donnxruntime_USE_OPENVINO_INTERFACE=" + ("ON" if args.enable_generic_interface else "OFF"), + "-Donnxruntime_USE_VITISAI_INTERFACE=" + ("ON" if args.enable_generic_interface else "OFF"), + "-Donnxruntime_USE_QNN_INTERFACE=" + ("ON" if args.enable_generic_interface else "OFF"), # set vars for migraphx "-Donnxruntime_USE_MIGRAPHX=" + ("ON" if args.use_migraphx else "OFF"), "-Donnxruntime_DISABLE_CONTRIB_OPS=" + ("ON" if args.disable_contrib_ops else "OFF"), @@ -1248,8 +1277,7 @@ def generate_build_tree( cmake_args += ["-Donnxruntime_MPI_HOME=" + mpi_home] else: log.warning( - "mpi_home is supplied but use_mpi is set to false." - " Build will continue without linking MPI libraries." + "mpi_home is supplied but use_mpi is set to false. Build will continue without linking MPI libraries." ) if nccl_home and os.path.exists(nccl_home): @@ -1352,6 +1380,13 @@ def generate_build_tree( raise BuildError("qnn_home=" + qnn_home + " not valid." + " qnn_home paths must be specified and valid.") cmake_args += ["-Donnxruntime_USE_QNN=ON"] + if args.use_qnn == "static_lib": + cmake_args += ["-Donnxruntime_BUILD_QNN_EP_STATIC_LIB=ON"] + if args.android and args.use_qnn != "static_lib": + raise BuildError("Only support Android + QNN builds with QNN EP built as a static library.") + if args.use_qnn == "static_lib" and args.enable_generic_interface: + raise BuildError("Generic ORT interface only supported with QNN EP built as a shared library.") + if args.use_coreml: cmake_args += ["-Donnxruntime_USE_COREML=ON"] @@ -1390,7 +1425,7 @@ def generate_build_tree( if not all(needed_args): raise BuildError( "iOS/MacOS framework build on MacOS canceled due to missing arguments: " - + ", ".join(val for val, cond in zip(arg_names, needed_args) if not cond) + + ", ".join(val for val, cond in zip(arg_names, needed_args, strict=False) if not cond) ) # note: this value is mainly used in framework_info.json file to specify the build osx type platform_name = "macabi" if args.macos == "Catalyst" else args.apple_sysroot @@ -1508,6 +1543,12 @@ def generate_build_tree( "-Donnxruntime_USE_FULL_PROTOBUF=ON", ] + # When this flag is enabled, that means we only build ONNXRuntime shared library, expecting some compatible EP + # shared lib being build in a seperate process. So we skip the test for now as ONNXRuntime shared lib built under + # this flag is not expected to work alone + if args.enable_generic_interface: + cmake_args += ["-Donnxruntime_BUILD_UNIT_TESTS=OFF"] + if args.enable_lazy_tensor: import torch @@ -1594,7 +1635,7 @@ def generate_build_tree( if args.parallel == 0: cflags += ["/MP"] else: - cflags += ["/MP%d" % njobs] + cflags += [f"/MP{njobs}"] # Setup default values for cflags/cxxflags/ldflags. # The values set here are purely for security and compliance purposes. ONNX Runtime should work fine without these flags. if ( @@ -2320,6 +2361,8 @@ def build_python_wheel( args.append("--use_rocm") if rocm_version: args.append(f"--rocm_version={rocm_version}") + if use_migraphx: + args.append("--use_migraphx") elif use_migraphx: args.append("--use_migraphx") elif use_openvino: @@ -2401,9 +2444,11 @@ def build_nuget_package( elif use_rocm: package_name = "/p:OrtPackageId=Microsoft.ML.OnnxRuntime.ROCm" elif use_qnn: + if use_qnn != "shared_lib": + raise BuildError("Currently NuGet packages with QNN require QNN EP to be built as a shared library.") execution_provider = "/p:ExecutionProvider=qnn" package_name = "/p:OrtPackageId=Microsoft.ML.OnnxRuntime.QNN" - elif any(map(lambda x: "OrtPackageId=" in x, msbuild_extra_options)): + elif any("OrtPackageId=" in x for x in msbuild_extra_options): pass else: # we currently only allow building with mobile targets on Windows. @@ -2624,6 +2669,9 @@ def main(): # Disable ONNX Runtime's builtin memory checker args.disable_memleak_checker = True + if args.enable_generic_interface: + args.test = False + # If there was no explicit argument saying what to do, default # to update, build and test (for native builds). if not (args.update or args.clean or args.build or args.test or args.gen_doc): @@ -2727,7 +2775,10 @@ def main(): source_dir = os.path.normpath(os.path.join(script_dir, "..", "..")) # if using cuda, setup cuda paths and env vars - cuda_home, cudnn_home = setup_cuda_vars(args) + cuda_home = "" + cudnn_home = "" + if args.use_cuda: + cuda_home, cudnn_home = setup_cuda_vars(args) mpi_home = args.mpi_home nccl_home = args.nccl_home @@ -2740,10 +2791,14 @@ def main(): armnn_home = args.armnn_home armnn_libs = args.armnn_libs - qnn_home = args.qnn_home + qnn_home = "" + if args.use_qnn: + qnn_home = args.qnn_home # if using tensorrt, setup tensorrt paths - tensorrt_home = setup_tensorrt_vars(args) + tensorrt_home = "" + if args.use_tensorrt: + tensorrt_home = setup_tensorrt_vars(args) # if using migraphx, setup migraphx paths migraphx_home = setup_migraphx_vars(args) @@ -2828,9 +2883,9 @@ def main(): toolset = "host=" + host_arch + ",version=" + args.msvc_toolset else: toolset = "host=" + host_arch - if args.cuda_version: + if args.use_cuda and args.cuda_version: toolset += ",cuda=" + args.cuda_version - elif args.cuda_home: + elif args.use_cuda and args.cuda_home: toolset += ",cuda=" + args.cuda_home if args.windows_sdk_version: target_arch += ",version=" + args.windows_sdk_version diff --git a/tools/ci_build/compile_triton.py b/tools/ci_build/compile_triton.py index c1119aad49ae8..abe95b31e8e37 100644 --- a/tools/ci_build/compile_triton.py +++ b/tools/ci_build/compile_triton.py @@ -93,9 +93,9 @@ def convert_and_save(metadata, header_file, out_dir, out_obj_file): lib_name = m["lib_file"].replace(".", "_") meta_ele.append(f'"_binary_{lib_name}_start"') - meta_ele.append(f"\"{m['func_name']}\"") - meta_ele.append(f"\"{m['group']}\"") - meta_ele.append(f"\"{m['name']}\"") + meta_ele.append(f'"{m["func_name"]}"') + meta_ele.append(f'"{m["group"]}"') + meta_ele.append(f'"{m["name"]}"') meta_ele.append(str(m["num_warps"])) meta_ele.append(str(m["shared"])) @@ -103,9 +103,9 @@ def convert_and_save(metadata, header_file, out_dir, out_obj_file): constants = [] for k, v in m["constants"].items(): constants.append(f'{{ "{k}", {v!s}}}') - meta_ele.append(f"{{ { ', '.join(constants) } }}") + meta_ele.append(f"{{ {', '.join(constants)} }}") - c_metadata.append(f"{{ { ', '.join(meta_ele) } }}") + c_metadata.append(f"{{ {', '.join(meta_ele)} }}") archive_obj_files(binary_files, out_dir, out_obj_file) @@ -123,7 +123,7 @@ def convert_and_save(metadata, header_file, out_dir, out_obj_file): }}; const _TritonKernelInfo kernel_infos[] = {{ - { ', '.join(c_metadata) }, + {", ".join(c_metadata)}, }}; """ diff --git a/tools/ci_build/gen_def.py b/tools/ci_build/gen_def.py index 2b7790ec4e683..76d9c9499c478 100755 --- a/tools/ci_build/gen_def.py +++ b/tools/ci_build/gen_def.py @@ -46,7 +46,7 @@ def parse_arguments(): for symbol in symbols: if args.style == "vc": - file.write(" %s @%d\n" % (symbol, symbol_index)) + file.write(f" {symbol} @{symbol_index}\n") elif args.style == "xcode": file.write(f"_{symbol}\n") else: diff --git a/tools/ci_build/get_docker_image.py b/tools/ci_build/get_docker_image.py index a3f603b0beda4..e656cedae5916 100755 --- a/tools/ci_build/get_docker_image.py +++ b/tools/ci_build/get_docker_image.py @@ -98,7 +98,6 @@ def main(): ) if use_container_registry: - run(args.docker_path, "buildx", "create", "--driver=docker-container", "--name=container_builder") run( args.docker_path, "--log-level", @@ -109,8 +108,6 @@ def main(): "--tag", full_image_name, "--cache-from=type=registry,ref=" + full_image_name, - "--builder", - "container_builder", "--build-arg", "BUILDKIT_INLINE_CACHE=1", *shlex.split(args.docker_build_args), diff --git a/tools/ci_build/github/android/build_aar_and_copy_artifacts.sh b/tools/ci_build/github/android/build_aar_and_copy_artifacts.sh index 29c52404dc7e3..001fa2dc188a4 100755 --- a/tools/ci_build/github/android/build_aar_and_copy_artifacts.sh +++ b/tools/ci_build/github/android/build_aar_and_copy_artifacts.sh @@ -6,7 +6,6 @@ set -e set -x -export PATH=/opt/python/cp312-cp312/bin:$PATH ls /build ls /build/deps @@ -25,7 +24,7 @@ ANDROID_SDK_HOME="/android_home" ANDROID_NDK_HOME="/ndk_home" QNN_HOME="/qnn_home" - +python3 -m pip install -r /onnxruntime_src/tools/ci_build/requirements/pybind/requirements.txt # Base command for building the AAR package COMMAND="python3 $BUILD_SCRIPT --build_dir /build --config $BUILD_CONFIG --android_sdk_path $ANDROID_SDK_HOME --android_ndk_path $ANDROID_NDK_HOME $BUILD_SETTINGS" diff --git a/tools/ci_build/github/android/build_aar_package.py b/tools/ci_build/github/android/build_aar_package.py index 1b34b3d302e57..c2bc5cba82a23 100644 --- a/tools/ci_build/github/android/build_aar_package.py +++ b/tools/ci_build/github/android/build_aar_package.py @@ -41,10 +41,7 @@ def _parse_build_settings(args): build_settings = {} - if "build_abis" in build_settings_data: - build_settings["build_abis"] = build_settings_data["build_abis"] - else: - build_settings["build_abis"] = DEFAULT_BUILD_ABIS + build_settings["build_abis"] = build_settings_data.get("build_abis", DEFAULT_BUILD_ABIS) build_params = [] if "build_params" in build_settings_data: @@ -75,11 +72,15 @@ def _parse_build_settings(args): return build_settings +def _is_qnn_android_build(build_settings): + return any(build_arg.startswith("--use_qnn") for build_arg in build_settings["build_params"]) + + def _build_aar(args): build_settings = _parse_build_settings(args) build_dir = os.path.abspath(args.build_dir) ops_config_path = os.path.abspath(args.include_ops_by_config) if args.include_ops_by_config else None - qnn_android_build = "--use_qnn" in build_settings["build_params"] + qnn_android_build = _is_qnn_android_build(build_settings) # Setup temp environment for building temp_env = os.environ.copy() diff --git a/tools/ci_build/github/android/default_qnn_aar_build_settings.json b/tools/ci_build/github/android/default_qnn_aar_build_settings.json index 599c108f830e7..5ac49f582d23e 100644 --- a/tools/ci_build/github/android/default_qnn_aar_build_settings.json +++ b/tools/ci_build/github/android/default_qnn_aar_build_settings.json @@ -2,8 +2,8 @@ "build_abis": [ "arm64-v8a" ], - "android_min_sdk_version": 21, - "android_target_sdk_version": 24, + "android_min_sdk_version": 24, + "android_target_sdk_version": 34, "build_params": [ "--enable_lto", "--android", @@ -11,7 +11,7 @@ "--cmake_generator=Ninja", "--build_java", "--build_shared_lib", - "--use_qnn", + "--use_qnn=static_lib", "--cmake_extra_defines=onnxruntime_BUILD_UNIT_TESTS=OFF", "--skip_tests" diff --git a/tools/ci_build/github/android/training_full_aar_build_settings.json b/tools/ci_build/github/android/training_full_aar_build_settings.json index 013804e2d63e9..7354bc774aa53 100644 --- a/tools/ci_build/github/android/training_full_aar_build_settings.json +++ b/tools/ci_build/github/android/training_full_aar_build_settings.json @@ -5,8 +5,8 @@ "x86", "x86_64" ], - "android_min_sdk_version": 21, - "android_target_sdk_version": 24, + "android_min_sdk_version": 24, + "android_target_sdk_version": 34, "build_params": [ "--enable_lto", "--android", diff --git a/tools/ci_build/github/apple/build_and_assemble_apple_pods.py b/tools/ci_build/github/apple/build_and_assemble_apple_pods.py index dd037c17ae3b3..c18cb1d0705fe 100755 --- a/tools/ci_build/github/apple/build_and_assemble_apple_pods.py +++ b/tools/ci_build/github/apple/build_and_assemble_apple_pods.py @@ -11,9 +11,10 @@ import tempfile from c.assemble_c_pod_package import assemble_c_pod_package -from objectivec.assemble_objc_pod_package import assemble_objc_pod_package from package_assembly_utils import PackageVariant, get_ort_version +from objectivec.assemble_objc_pod_package import assemble_objc_pod_package + SCRIPT_PATH = pathlib.Path(__file__).resolve() SCRIPT_DIR = SCRIPT_PATH.parent REPO_DIR = SCRIPT_PATH.parents[4] diff --git a/tools/ci_build/github/apple/default_full_apple_framework_build_settings.json b/tools/ci_build/github/apple/default_full_apple_framework_build_settings.json index a717df01dcd58..d99191e4f45d8 100644 --- a/tools/ci_build/github/apple/default_full_apple_framework_build_settings.json +++ b/tools/ci_build/github/apple/default_full_apple_framework_build_settings.json @@ -24,7 +24,7 @@ ], "macosx": [ "--macos=MacOSX", - "--apple_deploy_target=11.0" + "--apple_deploy_target=13.3" ], "iphoneos": [ "--ios", diff --git a/tools/ci_build/github/apple/default_training_ios_framework_build_settings.json b/tools/ci_build/github/apple/default_training_ios_framework_build_settings.json index 5a4026697d14c..e35ddb93a173d 100644 --- a/tools/ci_build/github/apple/default_training_ios_framework_build_settings.json +++ b/tools/ci_build/github/apple/default_training_ios_framework_build_settings.json @@ -33,7 +33,7 @@ ], "macosx": [ "--macos=MacOSX", - "--apple_deploy_target=11.0" + "--apple_deploy_target=13.3" ] } } diff --git a/tools/ci_build/github/apple/objectivec/assemble_objc_pod_package.py b/tools/ci_build/github/apple/objectivec/assemble_objc_pod_package.py index b7eb34cb09219..a829d244ad6e0 100755 --- a/tools/ci_build/github/apple/objectivec/assemble_objc_pod_package.py +++ b/tools/ci_build/github/apple/objectivec/assemble_objc_pod_package.py @@ -93,8 +93,8 @@ def get_pod_files(package_variant: PackageVariant): else: # return files that are in pod_files but not in training_only_objc_files filtered_pod_files = {} - for key in all_objc_files: - filtered_pod_files[key] = filter_files(all_objc_files[key], training_only_objc_files[key]) + for key, value in all_objc_files.items(): + filtered_pod_files[key] = filter_files(value, training_only_objc_files[key]) return filtered_pod_files diff --git a/tools/ci_build/github/apple/package_assembly_utils.py b/tools/ci_build/github/apple/package_assembly_utils.py index c6822466d73d0..829bca8c743df 100644 --- a/tools/ci_build/github/apple/package_assembly_utils.py +++ b/tools/ci_build/github/apple/package_assembly_utils.py @@ -7,7 +7,6 @@ import pathlib import re import shutil -from typing import Dict, List _script_dir = pathlib.Path(__file__).parent.resolve(strict=True) repo_root = _script_dir.parents[3] @@ -30,7 +29,7 @@ def all_variant_names(cls): def gen_file_from_template( - template_file: pathlib.Path, output_file: pathlib.Path, variable_substitutions: Dict[str, str], strict: bool = True + template_file: pathlib.Path, output_file: pathlib.Path, variable_substitutions: dict[str, str], strict: bool = True ): """ Generates a file from a template file. @@ -69,7 +68,7 @@ def replace_template_variable(match): output.write(content) -def filter_files(all_file_patterns: List[str], excluded_file_patterns: List[str]): +def filter_files(all_file_patterns: list[str], excluded_file_patterns: list[str]): """ Filters file paths based on inclusion and exclusion patterns @@ -90,7 +89,7 @@ def filter_files(all_file_patterns: List[str], excluded_file_patterns: List[str] return list(set(all_files) - set(exclude_files)) -def copy_repo_relative_to_dir(patterns: List[str], dest_dir: pathlib.Path): +def copy_repo_relative_to_dir(patterns: list[str], dest_dir: pathlib.Path): """ Copies file paths relative to the repo root to a directory. The given paths or path patterns are relative to the repo root, and the diff --git a/tools/ci_build/github/apple/package_release_tasks.py b/tools/ci_build/github/apple/package_release_tasks.py index 592a326d86ba2..c8d78400c6ff0 100755 --- a/tools/ci_build/github/apple/package_release_tasks.py +++ b/tools/ci_build/github/apple/package_release_tasks.py @@ -52,8 +52,7 @@ def _resolve_single_path_from_pattern(path_pattern: str) -> Path: def _parse_args(): parser = argparse.ArgumentParser( - description="Helper script to perform release tasks. " - "Mostly useful for the CocoaPods package release pipeline.", + description="Helper script to perform release tasks. Mostly useful for the CocoaPods package release pipeline.", ) parser.add_argument( diff --git a/tools/ci_build/github/azure-pipelines/android-arm64-v8a-QNN-crosscompile-ci-pipeline.yml b/tools/ci_build/github/azure-pipelines/android-arm64-v8a-QNN-crosscompile-ci-pipeline.yml index c3dbee336b69d..f237ef37fe82c 100644 --- a/tools/ci_build/github/azure-pipelines/android-arm64-v8a-QNN-crosscompile-ci-pipeline.yml +++ b/tools/ci_build/github/azure-pipelines/android-arm64-v8a-QNN-crosscompile-ci-pipeline.yml @@ -32,7 +32,7 @@ parameters: - name: QnnSdk displayName: QNN SDK version type: string - default: 2.28.2.241116 + default: 2.30.0.250109 jobs: - job: Build_QNN_EP @@ -72,7 +72,8 @@ jobs: --android_abi=x86_64 \ --android_api=31 \ --parallel \ - --use_qnn \ + --build_shared_lib \ + --use_qnn static_lib \ --qnn_home $(QnnSDKRootDir) \ --cmake_generator=Ninja \ --skip_tests diff --git a/tools/ci_build/github/azure-pipelines/bigmodels-ci-pipeline.yml b/tools/ci_build/github/azure-pipelines/bigmodels-ci-pipeline.yml index 56cb26f61dbb5..0eaaea562ca36 100644 --- a/tools/ci_build/github/azure-pipelines/bigmodels-ci-pipeline.yml +++ b/tools/ci_build/github/azure-pipelines/bigmodels-ci-pipeline.yml @@ -41,7 +41,7 @@ parameters: variables: - name: docker_base_image - value: onnxruntimebuildcache.azurecr.io/internal/azureml/onnxruntime/build/cuda11_x64_almalinux8_gcc11:20250108.1 + value: onnxruntimebuildcache.azurecr.io/internal/azureml/onnxruntime/build/cuda11_x64_almalinux8_gcc11:20250124.1 - name: linux_trt_version value: 10.3.0.26-1.cuda11.8 - name: Repository diff --git a/tools/ci_build/github/azure-pipelines/binary-size-checks-pipeline.yml b/tools/ci_build/github/azure-pipelines/binary-size-checks-pipeline.yml index 74866cfd59b52..ca7ef2e49cdf6 100644 --- a/tools/ci_build/github/azure-pipelines/binary-size-checks-pipeline.yml +++ b/tools/ci_build/github/azure-pipelines/binary-size-checks-pipeline.yml @@ -4,14 +4,6 @@ parameters: type: boolean default: false -resources: - repositories: - - repository: manylinux - type: Github - endpoint: Microsoft - name: pypa/manylinux - ref: 5eda9aded5462201e6310105728d33016e637ea7 - stages: - template: templates/android-binary-size-check-stage.yml parameters: diff --git a/tools/ci_build/github/azure-pipelines/build-perf-test-binaries-pipeline.yml b/tools/ci_build/github/azure-pipelines/build-perf-test-binaries-pipeline.yml index 5adaa686f6c0f..0ce4227c9ef9f 100644 --- a/tools/ci_build/github/azure-pipelines/build-perf-test-binaries-pipeline.yml +++ b/tools/ci_build/github/azure-pipelines/build-perf-test-binaries-pipeline.yml @@ -6,14 +6,6 @@ parameters: type: boolean default: true -resources: - repositories: - - repository: manylinux - type: Github - endpoint: Microsoft - name: pypa/manylinux - ref: 5eda9aded5462201e6310105728d33016e637ea7 - stages: # build binaries for Android diff --git a/tools/ci_build/github/azure-pipelines/c-api-noopenmp-packaging-pipelines.yml b/tools/ci_build/github/azure-pipelines/c-api-noopenmp-packaging-pipelines.yml index cbc3aa705b4f9..781c5964138f8 100644 --- a/tools/ci_build/github/azure-pipelines/c-api-noopenmp-packaging-pipelines.yml +++ b/tools/ci_build/github/azure-pipelines/c-api-noopenmp-packaging-pipelines.yml @@ -62,7 +62,7 @@ parameters: - name: QnnSdk displayName: QNN SDK Version type: string - default: 2.28.0.241029 + default: 2.30.0.250109 resources: repositories: @@ -70,11 +70,6 @@ resources: type: github endpoint: ort-examples name: microsoft/onnxruntime-inference-examples - - repository: manylinux - type: Github - endpoint: Microsoft - name: pypa/manylinux - ref: 5eda9aded5462201e6310105728d33016e637ea7 variables: - template: templates/common-variables.yml diff --git a/tools/ci_build/github/azure-pipelines/cuda-packaging-pipeline.yml b/tools/ci_build/github/azure-pipelines/cuda-packaging-pipeline.yml index bc33aba57ec93..b24310ac0c3e0 100644 --- a/tools/ci_build/github/azure-pipelines/cuda-packaging-pipeline.yml +++ b/tools/ci_build/github/azure-pipelines/cuda-packaging-pipeline.yml @@ -78,11 +78,7 @@ resources: type: github endpoint: ort-examples name: microsoft/onnxruntime-inference-examples - - repository: manylinux - type: Github - endpoint: Microsoft - name: pypa/manylinux - ref: 5eda9aded5462201e6310105728d33016e637ea7 + stages: # Set ReleaseVersionSuffix diff --git a/tools/ci_build/github/azure-pipelines/linux-ci-pipeline.yml b/tools/ci_build/github/azure-pipelines/linux-ci-pipeline.yml index 0c10d404931e4..dc57fd41fa5f9 100644 --- a/tools/ci_build/github/azure-pipelines/linux-ci-pipeline.yml +++ b/tools/ci_build/github/azure-pipelines/linux-ci-pipeline.yml @@ -79,7 +79,7 @@ stages: onnxruntimecpubuildcentos8x64 \ /bin/bash -c ' set -ex; \ - python3.12 /onnxruntime_src/tools/ci_build/build.py \ + python3 /onnxruntime_src/tools/ci_build/build.py \ --build_dir /build --cmake_generator 'Ninja' \ --config Debug \ --skip_submodule_sync \ @@ -87,7 +87,7 @@ stages: --parallel --use_binskim_compliant_compile_flags \ --enable_onnx_tests --enable_address_sanitizer \ --update --build; - python3.12 /onnxruntime_src/tools/ci_build/build.py \ + python3 /onnxruntime_src/tools/ci_build/build.py \ --build_dir /build --cmake_generator 'Ninja' \ --config Debug \ --skip_submodule_sync \ diff --git a/tools/ci_build/github/azure-pipelines/linux-cpu-minimal-build-ci-pipeline.yml b/tools/ci_build/github/azure-pipelines/linux-cpu-minimal-build-ci-pipeline.yml index e74f1968e3be3..c323e51035c23 100644 --- a/tools/ci_build/github/azure-pipelines/linux-cpu-minimal-build-ci-pipeline.yml +++ b/tools/ci_build/github/azure-pipelines/linux-cpu-minimal-build-ci-pipeline.yml @@ -67,10 +67,10 @@ jobs: - template: templates/get-docker-image-steps.yml parameters: - Dockerfile: tools/ci_build/github/linux/docker/Dockerfile.manylinux2_28_cpu - Context: tools/ci_build/github/linux/docker + Dockerfile: tools/ci_build/github/linux/docker/inference/x86_64/default/cpu/Dockerfile + Context: tools/ci_build/github/linux/docker/inference/x86_64/default/cpu DockerBuildArgs: "--build-arg BUILD_UID=$( id -u )" - Repository: onnxruntimecpubuild + Repository: onnxruntimecpubuildcentos8x64_packaging - task: CmdLine@2 displayName: Create test data directory @@ -104,7 +104,7 @@ jobs: -e BUILD_BUILDNUMBER \ -e CCACHE_DIR=/cache \ -e ORT_BUILD_WITH_CACHE=1 \ - onnxruntimecpubuild \ + onnxruntimecpubuildcentos8x64_packaging \ /bin/bash -c " set -e -x; /onnxruntime_src/tools/ci_build/github/linux/ort_minimal/build_full_ort_and_create_ort_files.sh /build/1; \ @@ -124,8 +124,8 @@ jobs: -e ALLOW_RELEASED_ONNX_OPSET_ONLY=1 \ -e NIGHTLY_BUILD \ -e BUILD_BUILDNUMBER \ - onnxruntimecpubuild \ - /opt/python/cp310-cp310/bin/python3.10 /onnxruntime_src/tools/ci_build/build.py \ + onnxruntimecpubuildcentos8x64_packaging \ + bash -c "python3 -m pip install -r /onnxruntime_src/tools/ci_build/requirements/pybind/requirements.txt && python3 /onnxruntime_src/tools/ci_build/build.py \ --build_dir /build/2 --cmake_generator Ninja \ --config Debug \ --skip_submodule_sync \ @@ -134,7 +134,7 @@ jobs: --skip_tests \ --minimal_build \ --disable_exceptions \ - --enable_training_ops + --enable_training_ops" workingDirectory: $(Build.SourcesDirectory) - task: CmdLine@2 @@ -148,7 +148,7 @@ jobs: -e ALLOW_RELEASED_ONNX_OPSET_ONLY=1 \ -e NIGHTLY_BUILD \ -e BUILD_BUILDNUMBER \ - onnxruntimecpubuild \ + onnxruntimecpubuildcentos8x64_packaging \ /bin/bash /onnxruntime_src/tools/ci_build/github/linux/ort_minimal/build_minimal_ort_and_run_tests.sh \ --build-directory /build/3a \ --reduced-ops-config /home/onnxruntimedev/.test_data/required_ops.ort_models.config \ @@ -166,7 +166,7 @@ jobs: -e ALLOW_RELEASED_ONNX_OPSET_ONLY=1 \ -e NIGHTLY_BUILD \ -e BUILD_BUILDNUMBER \ - onnxruntimecpubuild \ + onnxruntimecpubuildcentos8x64_packaging \ /bin/bash /onnxruntime_src/tools/ci_build/github/linux/ort_minimal/build_minimal_ort_and_run_tests.sh \ --build-directory /build/3b \ --reduced-ops-config /home/onnxruntimedev/.test_data/required_ops_and_types.ort_models.config \ @@ -188,7 +188,7 @@ jobs: -e ALLOW_RELEASED_ONNX_OPSET_ONLY=1 \ -e NIGHTLY_BUILD \ -e BUILD_BUILDNUMBER \ - onnxruntimecpubuild \ + onnxruntimecpubuildcentos8x64_packaging \ /bin/bash /onnxruntime_src/tools/ci_build/github/linux/ort_minimal/build_minimal_ort_and_run_tests.sh \ --build-directory /build/4 \ --reduced-ops-config /home/onnxruntimedev/.test_data/globally_allowed_types.config \ @@ -206,14 +206,14 @@ jobs: -e ALLOW_RELEASED_ONNX_OPSET_ONLY=1 \ -e NIGHTLY_BUILD \ -e BUILD_BUILDNUMBER \ - onnxruntimecpubuild \ - /opt/python/cp310-cp310/bin/python3.10 /onnxruntime_src/tools/ci_build/build.py \ + onnxruntimecpubuildcentos8x64_packaging \ + bash -c "python3 -m pip install -r /onnxruntime_src/tools/ci_build/requirements/pybind/requirements.txt && python3 /onnxruntime_src/tools/ci_build/build.py \ --build_dir /build/5 --cmake_generator Ninja \ --config Debug \ --skip_submodule_sync \ --build_shared_lib --use_binskim_compliant_compile_flags \ --parallel \ - --minimal_build extended + --minimal_build extended" workingDirectory: $(Build.SourcesDirectory) - task: CmdLine@2 @@ -227,8 +227,8 @@ jobs: -e ALLOW_RELEASED_ONNX_OPSET_ONLY=1 \ -e NIGHTLY_BUILD \ -e BUILD_BUILDNUMBER \ - onnxruntimecpubuild \ - /opt/python/cp310-cp310/bin/python3.10 /onnxruntime_src/tools/ci_build/build.py \ + onnxruntimecpubuildcentos8x64_packaging \ + bash -c "python3 -m pip install -r /onnxruntime_src/tools/ci_build/requirements/pybind/requirements.txt && python3 /onnxruntime_src/tools/ci_build/build.py \ --build_dir /build/6a \ --cmake_generator Ninja \ --config MinSizeRel \ @@ -240,7 +240,7 @@ jobs: --disable_ml_ops \ --disable_types sparsetensor float8 optional \ --include_ops_by_config /home/onnxruntimedev/.test_data/include_no_operators.config \ - --cmake_extra_defines onnxruntime_BUILD_UNIT_TESTS=OFF + --cmake_extra_defines onnxruntime_BUILD_UNIT_TESTS=OFF" workingDirectory: $(Build.SourcesDirectory) - task: CmdLine@2 @@ -254,8 +254,8 @@ jobs: -e ALLOW_RELEASED_ONNX_OPSET_ONLY=1 \ -e NIGHTLY_BUILD \ -e BUILD_BUILDNUMBER \ - onnxruntimecpubuild \ - /opt/python/cp310-cp310/bin/python3.10 /onnxruntime_src/tools/ci_build/build.py \ + onnxruntimecpubuildcentos8x64_packaging \ + bash -c "python3 -m pip install -r /onnxruntime_src/tools/ci_build/requirements/pybind/requirements.txt && python3 /onnxruntime_src/tools/ci_build/build.py \ --build_dir /build/6b \ --cmake_generator Ninja \ --config MinSizeRel \ @@ -269,7 +269,7 @@ jobs: --enable_reduced_operator_type_support \ --disable_types sparsetensor optional float8 \ --include_ops_by_config /home/onnxruntimedev/.test_data/include_no_operators.config \ - --cmake_extra_defines onnxruntime_BUILD_UNIT_TESTS=OFF + --cmake_extra_defines onnxruntime_BUILD_UNIT_TESTS=OFF" workingDirectory: $(Build.SourcesDirectory) - task: CmdLine@2 @@ -283,8 +283,8 @@ jobs: -e ALLOW_RELEASED_ONNX_OPSET_ONLY=1 \ -e NIGHTLY_BUILD \ -e BUILD_BUILDNUMBER \ - onnxruntimecpubuild \ - /opt/python/cp310-cp310/bin/python3.10 /onnxruntime_src/tools/ci_build/build.py \ + onnxruntimecpubuildcentos8x64_packaging \ + bash -c "python3 -m pip install -r /onnxruntime_src/tools/ci_build/requirements/pybind/requirements.txt && python3 /onnxruntime_src/tools/ci_build/build.py \ --build_dir /build/6c \ --cmake_generator Ninja \ --config MinSizeRel \ @@ -298,7 +298,7 @@ jobs: --enable_reduced_operator_type_support \ --disable_types sparsetensor optional float8 \ --include_ops_by_config /home/onnxruntimedev/.test_data/include_no_operators.config \ - --cmake_extra_defines onnxruntime_BUILD_UNIT_TESTS=OFF + --cmake_extra_defines onnxruntime_BUILD_UNIT_TESTS=OFF" workingDirectory: $(Build.SourcesDirectory) - task: CmdLine@2 @@ -313,8 +313,8 @@ jobs: --volume $NDK_HOME:/ndk_home \ -e ALLOW_RELEASED_ONNX_OPSET_ONLY=1 \ -e NIGHTLY_BUILD \ - onnxruntimecpubuild \ - /opt/python/cp310-cp310/bin/python3.10 /onnxruntime_src/tools/ci_build/build.py \ + onnxruntimecpubuildcentos8x64_packaging \ + bash -c "python3 -m pip install -r /onnxruntime_src/tools/ci_build/requirements/pybind/requirements.txt && python3 /onnxruntime_src/tools/ci_build/build.py \ --build_dir /build/7 \ --cmake_generator Ninja \ --config MinSizeRel \ @@ -330,7 +330,7 @@ jobs: --build_shared_lib \ --disable_ml_ops \ --disable_exceptions \ - --skip_tests --path_to_protoc_exe /usr/bin/protoc + --skip_tests" workingDirectory: $(Build.SourcesDirectory) - template: templates/explicitly-defined-final-tasks.yml diff --git a/tools/ci_build/github/azure-pipelines/linux-gpu-ci-pipeline.yml b/tools/ci_build/github/azure-pipelines/linux-gpu-ci-pipeline.yml index 0f8517d12ec74..71f7ab6e49b70 100644 --- a/tools/ci_build/github/azure-pipelines/linux-gpu-ci-pipeline.yml +++ b/tools/ci_build/github/azure-pipelines/linux-gpu-ci-pipeline.yml @@ -49,9 +49,9 @@ parameters: variables: - name: docker_base_image ${{ if eq(parameters.CudaVersion, '11.8') }}: - value: onnxruntimebuildcache.azurecr.io/internal/azureml/onnxruntime/build/cuda11_x64_almalinux8_gcc11:20250108.1 + value: onnxruntimebuildcache.azurecr.io/internal/azureml/onnxruntime/build/cuda11_x64_almalinux8_gcc11:20250124.1 ${{ if eq(parameters.CudaVersion, '12.2') }}: - value: onnxruntimebuildcache.azurecr.io/internal/azureml/onnxruntime/build/cuda12_x64_ubi8_gcc12:20250108.1 + value: onnxruntimebuildcache.azurecr.io/internal/azureml/onnxruntime/build/cuda12_x64_ubi8_gcc12:20250124.1 - name: Repository ${{ if eq(parameters.CudaVersion, '11.8') }}: diff --git a/tools/ci_build/github/azure-pipelines/linux-gpu-tensorrt-ci-pipeline.yml b/tools/ci_build/github/azure-pipelines/linux-gpu-tensorrt-ci-pipeline.yml index 9d306aaf5019c..c08eaaaa1308d 100644 --- a/tools/ci_build/github/azure-pipelines/linux-gpu-tensorrt-ci-pipeline.yml +++ b/tools/ci_build/github/azure-pipelines/linux-gpu-tensorrt-ci-pipeline.yml @@ -40,9 +40,9 @@ variables: - template: templates/common-variables.yml - name: docker_base_image ${{ if eq(parameters.CudaVersion, '11.8') }}: - value: onnxruntimebuildcache.azurecr.io/internal/azureml/onnxruntime/build/cuda11_x64_almalinux8_gcc11:20250108.1 + value: onnxruntimebuildcache.azurecr.io/internal/azureml/onnxruntime/build/cuda11_x64_almalinux8_gcc11:20250124.1 ${{ if eq(parameters.CudaVersion, '12.2') }}: - value: onnxruntimebuildcache.azurecr.io/internal/azureml/onnxruntime/build/cuda12_x64_ubi8_gcc12:20250108.1 + value: onnxruntimebuildcache.azurecr.io/internal/azureml/onnxruntime/build/cuda12_x64_ubi8_gcc12:20250124.1 - name: linux_trt_version ${{ if eq(parameters.CudaVersion, '11.8') }}: value: ${{ variables.linux_trt_version_cuda11 }} diff --git a/tools/ci_build/github/azure-pipelines/linux-gpu-tensorrt-cuda-minimal-ci-pipeline.yml b/tools/ci_build/github/azure-pipelines/linux-gpu-tensorrt-cuda-minimal-ci-pipeline.yml index 08032c75d67f5..4a86da167ff1f 100644 --- a/tools/ci_build/github/azure-pipelines/linux-gpu-tensorrt-cuda-minimal-ci-pipeline.yml +++ b/tools/ci_build/github/azure-pipelines/linux-gpu-tensorrt-cuda-minimal-ci-pipeline.yml @@ -40,9 +40,9 @@ variables: - template: templates/common-variables.yml - name: docker_base_image ${{ if eq(parameters.CudaVersion, '11.8') }}: - value: onnxruntimebuildcache.azurecr.io/internal/azureml/onnxruntime/build/cuda11_x64_almalinux8_gcc11:20250108.1 + value: onnxruntimebuildcache.azurecr.io/internal/azureml/onnxruntime/build/cuda11_x64_almalinux8_gcc11:20250124.1 ${{ if eq(parameters.CudaVersion, '12.2') }}: - value: onnxruntimebuildcache.azurecr.io/internal/azureml/onnxruntime/build/cuda12_x64_ubi8_gcc12:20250108.1 + value: onnxruntimebuildcache.azurecr.io/internal/azureml/onnxruntime/build/cuda12_x64_ubi8_gcc12:20250124.1 - name: linux_trt_version ${{ if eq(parameters.CudaVersion, '11.8') }}: value: ${{ variables.linux_trt_version_cuda11 }} diff --git a/tools/ci_build/github/azure-pipelines/linux-qnn-ci-pipeline.yml b/tools/ci_build/github/azure-pipelines/linux-qnn-ci-pipeline.yml index d3826d90f9073..093db011e44f9 100644 --- a/tools/ci_build/github/azure-pipelines/linux-qnn-ci-pipeline.yml +++ b/tools/ci_build/github/azure-pipelines/linux-qnn-ci-pipeline.yml @@ -33,7 +33,7 @@ parameters: - name: QnnSdk displayName: QNN SDK version type: string - default: 2.28.2.241116 + default: 2.30.0.250109 jobs: - job: Build_QNN_EP @@ -41,7 +41,12 @@ jobs: timeoutInMinutes: 60 workspace: clean: all - + strategy: + matrix: + SHARED_LIB: + QnnLibKind: 'shared_lib' + STATIC_LIB: + QnnLibKind: 'static_lib' steps: - script: | ls -R /data/qnn_test_data @@ -65,7 +70,8 @@ jobs: --config Release \ --use_binskim_compliant_compile_flags \ --build_java \ - --use_qnn \ + --build_shared_lib \ + --use_qnn $(QnnLibKind) \ --qnn_home $(QnnSDKRootDir) \ --cmake_generator=Ninja \ --update --build --parallel @@ -77,7 +83,8 @@ jobs: --config Release \ --use_binskim_compliant_compile_flags \ --build_java \ - --use_qnn \ + --build_shared_lib \ + --use_qnn $(QnnLibKind) \ --qnn_home $(QnnSDKRootDir) \ --cmake_generator=Ninja \ --test diff --git a/tools/ci_build/github/azure-pipelines/mac-react-native-ci-pipeline.yml b/tools/ci_build/github/azure-pipelines/mac-react-native-ci-pipeline.yml index ba9610ffee793..e72f088cfeb55 100644 --- a/tools/ci_build/github/azure-pipelines/mac-react-native-ci-pipeline.yml +++ b/tools/ci_build/github/azure-pipelines/mac-react-native-ci-pipeline.yml @@ -39,14 +39,6 @@ parameters: - 'custom' default: 'nightly (@dev)' -resources: - repositories: - - repository: manylinux # The name used to reference this repository in the checkout step - type: Github - endpoint: Microsoft - name: pypa/manylinux - ref: 5eda9aded5462201e6310105728d33016e637ea7 - variables: skipComponentGovernanceDetection: true ${{ if eq(parameters.NpmPublish, 'nightly (@dev)') }}: diff --git a/tools/ci_build/github/azure-pipelines/npm-packaging-pipeline.yml b/tools/ci_build/github/azure-pipelines/npm-packaging-pipeline.yml index f2bd7f2b57d5a..73719426e2875 100644 --- a/tools/ci_build/github/azure-pipelines/npm-packaging-pipeline.yml +++ b/tools/ci_build/github/azure-pipelines/npm-packaging-pipeline.yml @@ -61,7 +61,8 @@ stages: - stage: Download_Node_Package_And_Publish_Validation_Script dependsOn: - - ReactNative_CI + - ReactNative_CI_Android + - ReactNative_CI_iOS - Build_web_Release - Build_web_Debug jobs: @@ -71,9 +72,11 @@ stages: runCodesignValidationInjection: false timeoutInMinutes: 10 steps: +# This pipeline usually are triggered by Zip-Nuget-Java-Nodejs Packaging Pipeline, +# The NPM_packages is from Android_Java_API_AAR_Packaging_QNN, not from RN_CI - download: build artifact: 'NPM_packages' - displayName: 'Download onnxruntime-node Pipeline Artifact' + displayName: 'Download NPM_packages from Zip-Nuget-Java-Nodejs Packaging Pipeline Pipeline Artifact' - task: CopyFiles@2 inputs: diff --git a/tools/ci_build/github/azure-pipelines/py-cuda-alt-package-test-pipeline.yml b/tools/ci_build/github/azure-pipelines/py-cuda-alt-package-test-pipeline.yml index d5cd95a9b5508..960b59f93bee0 100644 --- a/tools/ci_build/github/azure-pipelines/py-cuda-alt-package-test-pipeline.yml +++ b/tools/ci_build/github/azure-pipelines/py-cuda-alt-package-test-pipeline.yml @@ -18,7 +18,7 @@ stages: machine_pool: 'Onnxruntime-Linux-GPU' python_wheel_suffix: '_gpu' timeout: 480 - docker_base_image: onnxruntimebuildcache.azurecr.io/internal/azureml/onnxruntime/build/cuda11_x64_almalinux8_gcc11:20250108.1 + docker_base_image: onnxruntimebuildcache.azurecr.io/internal/azureml/onnxruntime/build/cuda11_x64_almalinux8_gcc11:20250124.1 cuda_version: '11.8' - stage: Republish_Wheels diff --git a/tools/ci_build/github/azure-pipelines/py-cuda-package-test-pipeline.yml b/tools/ci_build/github/azure-pipelines/py-cuda-package-test-pipeline.yml index 00153f06b6e12..021f7c5ece140 100644 --- a/tools/ci_build/github/azure-pipelines/py-cuda-package-test-pipeline.yml +++ b/tools/ci_build/github/azure-pipelines/py-cuda-package-test-pipeline.yml @@ -18,7 +18,7 @@ stages: machine_pool: 'Onnxruntime-Linux-GPU' python_wheel_suffix: '_gpu' timeout: 480 - docker_base_image: onnxruntimebuildcache.azurecr.io/internal/azureml/onnxruntime/build/cuda12_x64_ubi8_gcc12:20250108.1 + docker_base_image: onnxruntimebuildcache.azurecr.io/internal/azureml/onnxruntime/build/cuda12_x64_ubi8_gcc12:20250124.1 cuda_version: '12.2' - stage: Republish_Wheels diff --git a/tools/ci_build/github/azure-pipelines/py-package-build-pipeline.yml b/tools/ci_build/github/azure-pipelines/py-package-build-pipeline.yml deleted file mode 100644 index afa0ad6f4cbc7..0000000000000 --- a/tools/ci_build/github/azure-pipelines/py-package-build-pipeline.yml +++ /dev/null @@ -1,62 +0,0 @@ -parameters: -- name: enable_linux_cpu - displayName: 'Whether Linux CPU package is built.' - type: boolean - default: true - -- name: enable_linux_gpu - displayName: 'Whether Linux GPU package is built.' - type: boolean - default: true - -- name: enable_windows_cpu - displayName: 'Whether Windows CPU package is built.' - type: boolean - default: true - -- name: enable_windows_gpu - displayName: 'Whether Windows GPU package is built.' - type: boolean - default: true - -- name: python_version - displayName: 'Python version used for build' - type: object - default: ["'3.8'"] - -- name: cpu_build_py_parameters - displayName: 'Extra parameters to pass to build.py for CPU package.' - type: string - default: '--use_openvino CPU' - -- name: gpu_build_py_parameters - displayName: 'Extra parameters to pass to build.py for GPU package.' - type: string - default: ' ' - -- name: ubuntu_version_linux_cpu - displayName: 'Ubuntu Version for Linux CPU package.' - type: string - default: '20.04' - -trigger: none - -resources: - repositories: - - repository: manylinux - type: Github - endpoint: Microsoft - name: pypa/manylinux - ref: 5eda9aded5462201e6310105728d33016e637ea7 - -stages: -- template: templates/py-packaging-selectable-stage.yml - parameters: - enable_linux_gpu: ${{ parameters.enable_linux_gpu }} - enable_linux_cpu: ${{ parameters.enable_linux_cpu }} - enable_windows_cpu: ${{ parameters.enable_windows_cpu }} - enable_windows_gpu: ${{ parameters.enable_windows_gpu }} - python_version: ${{ parameters.python_version }} - cpu_build_py_parameters: ${{ parameters.cpu_build_py_parameters }} - gpu_build_py_parameters: ${{ parameters.gpu_build_py_parameters }} - ubuntu_version_linux_cpu: ${{ parameters.ubuntu_version_linux_cpu }} diff --git a/tools/ci_build/github/azure-pipelines/py-packaging-pipeline.yml b/tools/ci_build/github/azure-pipelines/py-packaging-pipeline.yml index bd33282fd494e..57f7167ff1c6e 100644 --- a/tools/ci_build/github/azure-pipelines/py-packaging-pipeline.yml +++ b/tools/ci_build/github/azure-pipelines/py-packaging-pipeline.yml @@ -59,7 +59,7 @@ parameters: - name: qnn_sdk_version type: string displayName: 'QNN SDK version. Only for QNN packages.' - default: 2.28.2.241116 + default: 2.30.0.250109 trigger: none diff --git a/tools/ci_build/github/azure-pipelines/qnn-ep-nuget-packaging-pipeline.yml b/tools/ci_build/github/azure-pipelines/qnn-ep-nuget-packaging-pipeline.yml index d54b8018c232a..2b35afdfdb15c 100644 --- a/tools/ci_build/github/azure-pipelines/qnn-ep-nuget-packaging-pipeline.yml +++ b/tools/ci_build/github/azure-pipelines/qnn-ep-nuget-packaging-pipeline.yml @@ -2,7 +2,7 @@ parameters: - name: QnnSdk displayName: QNN SDK Version type: string - default: 2.28.2.241116 + default: 2.30.0.250109 - name: build_config displayName: Build Configuration diff --git a/tools/ci_build/github/azure-pipelines/stages/java-cuda-packaging-stage.yml b/tools/ci_build/github/azure-pipelines/stages/java-cuda-packaging-stage.yml index b9963b1c713e1..b081b39ad9bcc 100644 --- a/tools/ci_build/github/azure-pipelines/stages/java-cuda-packaging-stage.yml +++ b/tools/ci_build/github/azure-pipelines/stages/java-cuda-packaging-stage.yml @@ -142,9 +142,9 @@ stages: value: false - name: docker_base_image ${{ if eq(parameters.CudaVersion, '11.8') }}: - value: onnxruntimebuildcache.azurecr.io/internal/azureml/onnxruntime/build/cuda11_x64_almalinux8_gcc11:20250108.1 + value: onnxruntimebuildcache.azurecr.io/internal/azureml/onnxruntime/build/cuda11_x64_almalinux8_gcc11:20250124.1 ${{ if eq(parameters.CudaVersion, '12.2') }}: - value: onnxruntimebuildcache.azurecr.io/internal/azureml/onnxruntime/build/cuda12_x64_ubi8_gcc12:20250108.1 + value: onnxruntimebuildcache.azurecr.io/internal/azureml/onnxruntime/build/cuda12_x64_ubi8_gcc12:20250124.1 timeoutInMinutes: 60 steps: diff --git a/tools/ci_build/github/azure-pipelines/stages/jobs/py-linux-cuda-package-test-job.yml b/tools/ci_build/github/azure-pipelines/stages/jobs/py-linux-cuda-package-test-job.yml index 1667a724c91c0..85366ffc28b3a 100644 --- a/tools/ci_build/github/azure-pipelines/stages/jobs/py-linux-cuda-package-test-job.yml +++ b/tools/ci_build/github/azure-pipelines/stages/jobs/py-linux-cuda-package-test-job.yml @@ -45,9 +45,9 @@ jobs: - template: ../../templates/common-variables.yml - name: docker_base_image ${{ if eq(parameters.CudaVersion, '11.8') }}: - value: onnxruntimebuildcache.azurecr.io/internal/azureml/onnxruntime/build/cuda11_x64_almalinux8_gcc11:20250108.1 + value: onnxruntimebuildcache.azurecr.io/internal/azureml/onnxruntime/build/cuda11_x64_almalinux8_gcc11:20250124.1 ${{ if eq(parameters.CudaVersion, '12.2') }}: - value: onnxruntimebuildcache.azurecr.io/internal/azureml/onnxruntime/build/cuda12_x64_ubi8_gcc12:20250108.1 + value: onnxruntimebuildcache.azurecr.io/internal/azureml/onnxruntime/build/cuda12_x64_ubi8_gcc12:20250124.1 - name: linux_trt_version ${{ if eq(parameters.CudaVersion, '11.8') }}: value: ${{ variables.linux_trt_version_cuda11 }} diff --git a/tools/ci_build/github/azure-pipelines/stages/jobs/react-natvie-andriod-e2e-test-job.yml b/tools/ci_build/github/azure-pipelines/stages/jobs/react-natvie-andriod-e2e-test-job.yml new file mode 100644 index 0000000000000..5a060a8fcd4fa --- /dev/null +++ b/tools/ci_build/github/azure-pipelines/stages/jobs/react-natvie-andriod-e2e-test-job.yml @@ -0,0 +1,211 @@ +parameters: +- name: PackageName + displayName: 'Package name' + type: string + default: 'NPM_packages' + +- name: ArtifactName + type: string + default: 'onnxruntime-android-full-aar' + +- name: NpmPackagingMode + displayName: 'NPM packages publish configuration' + type: string + default: 'dev' + +jobs: +- job: ReactNative_CI_Android + pool: 'onnxruntime-Ubuntu2204-AMD-CPU' + variables: + runCodesignValidationInjection: false + timeoutInMinutes: 90 + steps: + - task: UsePythonVersion@0 + displayName: Use python 3.12 + inputs: + versionSpec: "3.12" + addToPath: true + architecture: "x64" + + - task: JavaToolInstaller@0 + displayName: Use jdk 17 + inputs: + versionSpec: '17' + jdkArchitectureOption: 'x64' + jdkSourceOption: 'PreInstalled' + + - task: NodeTool@0 + inputs: + versionSpec: '20.x' + + - script: | + sudo apt install coreutils ninja-build nodejs npm yarn + npm install --global yarn + displayName: Install coreutils, ninja, npm, and yarn + + - task: DownloadPipelineArtifact@2 + inputs: + buildType: 'current' + artifactName: '${{parameters.ArtifactName}}' + targetPath: '$(Build.BinariesDirectory)/android-full-aar' + displayName: Download Android AAR artifacts + + - task: CopyFiles@2 + inputs: + sourceFolder: $(Build.BinariesDirectory)/android-full-aar + contents: onnxruntime-android-*.aar + targetFolder: $(Build.SourcesDirectory)/js/react_native/android/libs + displayName: Copy Android package to React Native directory + + - script: | + npm ci + workingDirectory: '$(Build.SourcesDirectory)/js' + displayName: npm ci js + + - script: | + npm ci + workingDirectory: '$(Build.SourcesDirectory)/js/common' + displayName: npm ci js/common + + - script: | + yarn + workingDirectory: '$(Build.SourcesDirectory)/js/react_native' + displayName: yarn js/react_native + + - task: PowerShell@2 + inputs: + filePath: '$(Build.SourcesDirectory)/tools/ci_build/github/js/pack-npm-packages.ps1' + arguments: '"-dev.$(Get-Date -Format yyyyMMdd)-$(git rev-parse --short HEAD)" $(Build.SourcesDirectory) react_native' + workingDirectory: '$(Build.SourcesDirectory)' + errorActionPreference: stop + env: + ORT_JS_PACK_MODE: e2e + displayName: Pack NPM packages + + - script: | + mv $(Build.SourcesDirectory)/js/common/onnxruntime-common*.tgz onnxruntime-common.tgz + yarn add --no-lockfile file:./onnxruntime-common.tgz + mv $(Build.SourcesDirectory)/js/react_native/onnxruntime-react-native*.tgz onnxruntime-react-native.tgz + yarn add --no-lockfile file:./onnxruntime-react-native.tgz + yarn + workingDirectory: '$(Build.SourcesDirectory)/js/react_native/e2e' + displayName: Bootstrap Android and iOS e2e tests + + - script: | + yarn add --dev jest-junit + workingDirectory: '$(Build.SourcesDirectory)/js/react_native/e2e' + displayName: install jest junit reporter js/react_native/e2e + + - script: | + keytool -genkey -v -keystore debug.keystore -alias androiddebugkey -storepass android \ + -keypass android -keyalg RSA -keysize 2048 -validity 999999 -dname "CN=Android Debug,O=Android,C=US" + workingDirectory: '$(Build.SourcesDirectory)/js/react_native/e2e/android' + displayName: Generate a debug keystore + + - task: CopyFiles@2 + inputs: + sourceFolder: $(Build.BinariesDirectory)/android-full-aar + contents: onnxruntime-*.aar + targetFolder: $(Build.SourcesDirectory)/js/react_native/e2e/android/app/libs + displayName: Copy Android package to Android e2e test directory + + - script: | + yarn global add detox-cli + echo "Path: $PATH" + echo "##vso[task.prependpath]$(yarn global bin)" + echo "Updated PATH: $PATH" + echo "Detox bin directory: $(yarn global bin)" + ls $(yarn global bin) + displayName: Install detox cli tools and prepend to PATH + + - script: | + detox build --configuration android.emu.release + workingDirectory: '$(Build.SourcesDirectory)/js/react_native/e2e' + displayName: Build React Native Detox Android e2e Tests + + # + # Unit tests and E2E tests with Android emulator + # + - template: ../../templates/use-android-emulator.yml + parameters: + create: true + start: true + + - template: ../../templates/android-dump-logs-from-steps.yml + parameters: + steps: + - task: Gradle@3 + inputs: + gradleWrapperFile: '$(Build.SourcesDirectory)/js/react_native/android/gradlew' + workingDirectory: '$(Build.SourcesDirectory)/js/react_native/android' + options: '--stacktrace' + tasks: 'connectedDebugAndroidTest' + publishJUnitResults: true + testResultsFiles: '**/TEST-*.xml' + testRunTitle: 'React Native Android Instrumented Test results' + sonarQubeRunAnalysis: false + spotBugsAnalysis: false + displayName: Run React Native Android Instrumented Tests + + - script: | + JEST_JUNIT_OUTPUT_FILE=$(Build.SourcesDirectory)/js/react_native/e2e/android-test-results.xml \ + detox test --record-logs all \ + --configuration android.emu.release \ + --loglevel trace \ + --take-screenshots failing + workingDirectory: '$(Build.SourcesDirectory)/js/react_native/e2e' + displayName: Run React Native Detox Android e2e Tests + + - template: ../../templates/use-android-emulator.yml + parameters: + stop: true + + - task: PublishTestResults@2 + inputs: + testResultsFiles: '$(Build.SourcesDirectory)/js/react_native/e2e/android-test-results.xml' + failTaskOnFailedTests: true + testRunTitle: 'React Native Detox Android e2e Test Results' + condition: succeededOrFailed() + displayName: Publish React Native Detox Android e2e Test Results + + - script: | + git restore . + workingDirectory: '$(Build.SourcesDirectory)/js' + displayName: Restore git changes + + - task: PowerShell@2 + inputs: + filePath: '$(Build.SourcesDirectory)/tools/ci_build/github/js/pack-npm-packages.ps1' + arguments: '"${{parameters.NpmPackagingMode}}" $(Build.SourcesDirectory) react_native' + workingDirectory: '$(Build.SourcesDirectory)' + errorActionPreference: stop + displayName: Pack NPM packages + + - task: CopyFiles@2 + inputs: + sourceFolder: $(Build.SourcesDirectory)/js/common + contents: onnxruntime-common*.tgz + targetFolder: $(Build.ArtifactStagingDirectory) + displayName: 'Create Artifacts onnxruntime-common' + + - task: CopyFiles@2 + inputs: + sourceFolder: $(Build.SourcesDirectory)/js/react_native + contents: onnxruntime-react-native*.tgz + targetFolder: $(Build.ArtifactStagingDirectory) + displayName: Create Artifacts onnxruntime-react-native + + - task: PublishPipelineArtifact@1 + inputs: + artifact: android_e2e_test_logs_$(Build.BuildId)_$(Build.BuildNumber)_$(System.JobAttempt) + targetPath: '$(Build.SourcesDirectory)/js/react_native/e2e/artifacts' + condition: succeededOrFailed() + displayName: Publish React Native Detox E2E test logs + + - task: PublishPipelineArtifact@0 + inputs: + artifactName: '${{parameters.PackageName}}' + targetPath: '$(Build.ArtifactStagingDirectory)' + displayName: Publish Pipeline Artifact + + - template: ../../templates/explicitly-defined-final-tasks.yml \ No newline at end of file diff --git a/tools/ci_build/github/azure-pipelines/stages/nuget-win-cuda-packaging-stage.yml b/tools/ci_build/github/azure-pipelines/stages/nuget-win-cuda-packaging-stage.yml index 75da33722e2bf..095ac9e708ed0 100644 --- a/tools/ci_build/github/azure-pipelines/stages/nuget-win-cuda-packaging-stage.yml +++ b/tools/ci_build/github/azure-pipelines/stages/nuget-win-cuda-packaging-stage.yml @@ -50,7 +50,7 @@ stages: msbuildPlatform: x64 packageName: x64-cuda CudaVersion: ${{ parameters.CudaVersion }} - buildparameter: --use_cuda --cuda_home=${{ parameters.win_cuda_home }} --enable_onnx_tests --enable_wcos --cmake_extra_defines "CMAKE_CUDA_ARCHITECTURES=60;61;70;75;80" + buildparameter: --use_cuda --cuda_home=${{ parameters.win_cuda_home }} --enable_onnx_tests --enable_wcos --cmake_extra_defines "CMAKE_CUDA_ARCHITECTURES=75;80;90" runTests: ${{ parameters.RunOnnxRuntimeTests }} buildJava: ${{ parameters.buildJava }} java_artifact_id: onnxruntime_gpu @@ -68,7 +68,7 @@ stages: msbuildPlatform: x64 CudaVersion: ${{ parameters.CudaVersion }} packageName: x64-tensorrt - buildparameter: --use_tensorrt --tensorrt_home=${{ parameters.win_trt_home }} --cuda_home=${{ parameters.win_cuda_home }} --enable_onnx_tests --enable_wcos --cmake_extra_defines "CMAKE_CUDA_ARCHITECTURES=60;61;70;75;80" + buildparameter: --use_tensorrt --tensorrt_home=${{ parameters.win_trt_home }} --cuda_home=${{ parameters.win_cuda_home }} --enable_onnx_tests --enable_wcos --cmake_extra_defines "CMAKE_CUDA_ARCHITECTURES=75;80;90" runTests: ${{ parameters.RunOnnxRuntimeTests }} buildJava: ${{ parameters.buildJava }} java_artifact_id: onnxruntime_gpu diff --git a/tools/ci_build/github/azure-pipelines/stages/py-cpu-packaging-stage.yml b/tools/ci_build/github/azure-pipelines/stages/py-cpu-packaging-stage.yml index 665a11c64e873..c93194240564a 100644 --- a/tools/ci_build/github/azure-pipelines/stages/py-cpu-packaging-stage.yml +++ b/tools/ci_build/github/azure-pipelines/stages/py-cpu-packaging-stage.yml @@ -59,7 +59,7 @@ parameters: - name: qnn_sdk_version type: string displayName: 'QNN SDK version. Only for QNN packages.' - default: 2.28.2.241116 + default: 2.30.0.250109 stages: - ${{ if eq(parameters.enable_windows_cpu, true) }}: diff --git a/tools/ci_build/github/azure-pipelines/stages/py-gpu-packaging-stage.yml b/tools/ci_build/github/azure-pipelines/stages/py-gpu-packaging-stage.yml index 9b8a4cbc1c4bf..f48573abd3dba 100644 --- a/tools/ci_build/github/azure-pipelines/stages/py-gpu-packaging-stage.yml +++ b/tools/ci_build/github/azure-pipelines/stages/py-gpu-packaging-stage.yml @@ -56,7 +56,7 @@ stages: PYTHON_VERSION: ${{ python_version }} EP_NAME: gpu CudaVersion: ${{ parameters.cuda_version }} - EP_BUILD_FLAGS: --enable_lto --cuda_home=$(Agent.TempDirectory)\v${{ parameters.cuda_version }} --cmake_extra_defines "CMAKE_CUDA_ARCHITECTURES=52;60;61;70;75;80" + EP_BUILD_FLAGS: --enable_lto --cuda_home=$(Agent.TempDirectory)\v${{ parameters.cuda_version }} --cmake_extra_defines "CMAKE_CUDA_ARCHITECTURES=75;80;90" use_tensorrt: True - ${{ if eq(parameters.enable_linux_cuda, true) }}: @@ -68,9 +68,9 @@ stages: cmake_build_type: ${{ parameters.cmake_build_type }} cuda_version: ${{ parameters.cuda_version }} ${{ if eq(parameters.cuda_version, '11.8') }}: - docker_base_image: onnxruntimebuildcache.azurecr.io/internal/azureml/onnxruntime/build/cuda11_x64_almalinux8_gcc11:20250108.1 + docker_base_image: onnxruntimebuildcache.azurecr.io/internal/azureml/onnxruntime/build/cuda11_x64_almalinux8_gcc11:20250124.1 ${{ if eq(parameters.cuda_version, '12.2') }}: - docker_base_image: onnxruntimebuildcache.azurecr.io/internal/azureml/onnxruntime/build/cuda12_x64_ubi8_gcc12:20250108.1 + docker_base_image: onnxruntimebuildcache.azurecr.io/internal/azureml/onnxruntime/build/cuda12_x64_ubi8_gcc12:20250124.1 - ${{ if eq(parameters.enable_windows_dml, true) }}: - ${{ each python_version in parameters.PythonVersions }}: diff --git a/tools/ci_build/github/azure-pipelines/templates/android-binary-size-check-stage.yml b/tools/ci_build/github/azure-pipelines/templates/android-binary-size-check-stage.yml index 4d9606d82ced2..3cccd3aee15cd 100644 --- a/tools/ci_build/github/azure-pipelines/templates/android-binary-size-check-stage.yml +++ b/tools/ci_build/github/azure-pipelines/templates/android-binary-size-check-stage.yml @@ -38,13 +38,12 @@ stages: submodules: none - template: use-android-ndk.yml - #TODO: use a different docker file since this job doesn't need to rely on manylinux - template: get-docker-image-steps.yml parameters: - Dockerfile: tools/ci_build/github/linux/docker/Dockerfile.manylinux2_28_cpu - Context: tools/ci_build/github/linux/docker + Dockerfile: tools/ci_build/github/linux/docker/inference/x86_64/default/cpu/Dockerfile + Context: tools/ci_build/github/linux/docker/inference/x86_64/default/cpu DockerBuildArgs: "--build-arg BUILD_UID=$( id -u )" - Repository: onnxruntimecpubuild + Repository: onnxruntimecpubuildcentos8x64_packaging - task: PythonScript@0 displayName: 'Set variables from config file "${{ parameters.BuildConfigFile }}"' @@ -83,6 +82,7 @@ stages: --volume $(Build.BinariesDirectory):/build \ --volume $ANDROID_HOME:/android_home \ --volume $NDK_HOME:/ndk_home \ + -w /onnxruntime_src \ -e ALLOW_RELEASED_ONNX_OPSET_ONLY=1 \ -e NIGHTLY_BUILD \ -e BUILD_BUILDNUMBER \ @@ -90,11 +90,10 @@ stages: -e BUILD_ID=$(Build.BuildId) \ -e BUILD_REASON=$(Build.Reason) \ -e BUILD_BRANCH=$(Build.SourceBranch) \ - onnxruntimecpubuild \ - /opt/python/cp310-cp310/bin/python3 /onnxruntime_src/tools/ci_build/github/linux/ort_minimal/build_ort_and_check_binary_size.py \ + onnxruntimecpubuildcentos8x64_packaging \ + bash -c "python3 -m pip install -r tools/ci_build/requirements/pybind/requirements.txt && python3 tools/ci_build/github/linux/ort_minimal/build_ort_and_check_binary_size.py \ --build_dir /build/1a \ - ${BINARY_SIZE_THRESHOLD_ARGS} \ - "/onnxruntime_src/${{ parameters.BuildConfigFile }}" + ${BINARY_SIZE_THRESHOLD_ARGS} ${{ parameters.BuildConfigFile }} " workingDirectory: $(Build.SourcesDirectory) - task: AzureCLI@2 @@ -139,6 +138,7 @@ stages: --volume $(Build.BinariesDirectory):/build \ --volume $ANDROID_HOME:/android_home \ --volume $NDK_HOME:/ndk_home \ + -w /onnxruntime_src \ -e ALLOW_RELEASED_ONNX_OPSET_ONLY=1 \ -e NIGHTLY_BUILD \ -e BUILD_BUILDNUMBER \ @@ -146,11 +146,10 @@ stages: -e BUILD_ID=$(Build.BuildId) \ -e BUILD_REASON=$(Build.Reason) \ -e BUILD_BRANCH=$(Build.SourceBranch) \ - onnxruntimecpubuild \ - /opt/python/cp310-cp310/bin/python3 /onnxruntime_src/tools/ci_build/github/linux/ort_minimal/build_ort_and_check_binary_size.py \ + onnxruntimecpubuildcentos8x64_packaging \ + bash -c "python3 -m pip install -r tools/ci_build/requirements/pybind/requirements.txt && python3 tools/ci_build/github/linux/ort_minimal/build_ort_and_check_binary_size.py \ --build_dir /build/1b \ - --with_debug_info \ - "/onnxruntime_src/${{ parameters.BuildConfigFile }}" + --with_debug_info ${{ parameters.BuildConfigFile }}" workingDirectory: $(Build.SourcesDirectory) - task: PublishPipelineArtifact@1 diff --git a/tools/ci_build/github/azure-pipelines/templates/android-java-api-aar-test.yml b/tools/ci_build/github/azure-pipelines/templates/android-java-api-aar-test.yml index 7b378f5c8c474..ede9ec1a086ca 100644 --- a/tools/ci_build/github/azure-pipelines/templates/android-java-api-aar-test.yml +++ b/tools/ci_build/github/azure-pipelines/templates/android-java-api-aar-test.yml @@ -4,11 +4,6 @@ parameters: type: string default: '' -- name: job_name_suffix - displayName: job name - type: string - default: '' - - name: packageName displayName: Package Name type: string @@ -22,20 +17,16 @@ parameters: - name: QnnSDKVersion displayName: QNN SDK Version type: string - default: '2.28.0.241029' + default: '2.30.0.250109' jobs: -- job: Final_AAR_Testing_Android_${{ parameters.job_name_suffix }} +- job: Final_AAR_Testing_Android + pool: 'onnxruntime-Ubuntu2204-AMD-CPU' workspace: clean: all - pool: - vmImage: 'macOS-13' variables: - - name: runCodesignValidationInjection - value: false + runCodesignValidationInjection: false timeoutInMinutes: 90 - dependsOn: - - Android_Java_API_AAR_Packaging_${{ parameters.job_name_suffix }} steps: - template: set-version-number-variables-step.yml diff --git a/tools/ci_build/github/azure-pipelines/templates/android-java-api-aar.yml b/tools/ci_build/github/azure-pipelines/templates/android-java-api-aar.yml index c38736edd58f1..c32b09ac8c302 100644 --- a/tools/ci_build/github/azure-pipelines/templates/android-java-api-aar.yml +++ b/tools/ci_build/github/azure-pipelines/templates/android-java-api-aar.yml @@ -51,7 +51,7 @@ parameters: - name: QnnSDKVersion displayName: QNN SDK Version type: string - default: '2.28.0.241029' + default: '2.30.0.250109' jobs: - job: Android_Java_API_AAR_Packaging_${{ parameters.job_name_suffix }} @@ -76,13 +76,12 @@ jobs: mkdir -p $(artifacts_directory) workingDirectory: $(Build.BinariesDirectory) - #TODO: use a different docker file since this job doesn't need to rely on manylinux - template: get-docker-image-steps.yml parameters: - Dockerfile: tools/ci_build/github/linux/docker/Dockerfile.manylinux2_28_cpu - Context: tools/ci_build/github/linux/docker + Dockerfile: tools/ci_build/github/linux/docker/inference/x86_64/default/cpu/Dockerfile + Context: tools/ci_build/github/linux/docker/inference/x86_64/default/cpu DockerBuildArgs: "--build-arg BUILD_UID=$( id -u )" - Repository: onnxruntimecpubuild + Repository: onnxruntimecpubuildcentos8x64_packaging - template: set-version-number-variables-step.yml @@ -127,7 +126,7 @@ jobs: -e PUBLISH_EXECUTABLES=${{parameters.publish_executables}} \ -e PACKAGE_NAME=${{parameters.packageName}} \ -e RELEASE_VERSION_SUFFIX=${{parameters.ReleaseVersionSuffix}} \ - onnxruntimecpubuild \ + onnxruntimecpubuildcentos8x64_packaging \ /bin/bash /onnxruntime_src/tools/ci_build/github/android/build_aar_and_copy_artifacts.sh $USE_QNN workingDirectory: $(Build.SourcesDirectory) diff --git a/tools/ci_build/github/azure-pipelines/templates/c-api-cpu.yml b/tools/ci_build/github/azure-pipelines/templates/c-api-cpu.yml index f5350483ff144..1ab4fd2a8e9e7 100644 --- a/tools/ci_build/github/azure-pipelines/templates/c-api-cpu.yml +++ b/tools/ci_build/github/azure-pipelines/templates/c-api-cpu.yml @@ -51,7 +51,7 @@ parameters: - name: QnnSDKVersion displayName: QNN SDK Version type: string - default: 2.28.0.241029 + default: 2.30.0.250109 stages: - template: linux-cpu-packaging-pipeline.yml @@ -82,10 +82,12 @@ stages: packageName: 'onnxruntime-android' ReleaseVersionSuffix: $(ReleaseVersionSuffix) +- stage: Android_Java_API_AAR_Testing_Full + dependsOn: Android_Java_API_AAR_Packaging_Full + jobs: - template: android-java-api-aar-test.yml parameters: artifactName: 'onnxruntime-android-full-aar' - job_name_suffix: 'Full' ReleaseVersionSuffix: $(ReleaseVersionSuffix) - stage: Android_Java_API_AAR_Packaging_QNN @@ -105,10 +107,12 @@ stages: ReleaseVersionSuffix: $(ReleaseVersionSuffix) QnnSDKVersion: ${{ parameters.QnnSDKVersion }} +- stage: Final_AAR_Testing_Android_QNN + dependsOn: Android_Java_API_AAR_Packaging_QNN + jobs: - template: android-java-api-aar-test.yml parameters: artifactName: 'onnxruntime-android-qnn-aar' - job_name_suffix: 'QNN' packageName: 'onnxruntime-android-qnn' QnnSDKVersion: ${{ parameters.QnnSDKVersion }} diff --git a/tools/ci_build/github/azure-pipelines/templates/c-api-linux-cpu.yml b/tools/ci_build/github/azure-pipelines/templates/c-api-linux-cpu.yml index 30b9e93594b55..d4bc54273a764 100644 --- a/tools/ci_build/github/azure-pipelines/templates/c-api-linux-cpu.yml +++ b/tools/ci_build/github/azure-pipelines/templates/c-api-linux-cpu.yml @@ -65,7 +65,7 @@ jobs: set -e -x mkdir -p $HOME/.onnx docker run --rm --volume /data/onnx:/data/onnx:ro --volume $(Build.SourcesDirectory):/onnxruntime_src --volume $(Build.BinariesDirectory):/build \ - --volume $HOME/.onnx:/home/onnxruntimedev/.onnx -e NIGHTLY_BUILD onnxruntimecpubuildcentos8${{parameters.OnnxruntimeArch}}_packaging /bin/bash -c "python3.12 \ + --volume $HOME/.onnx:/home/onnxruntimedev/.onnx -e NIGHTLY_BUILD onnxruntimecpubuildcentos8${{parameters.OnnxruntimeArch}}_packaging /bin/bash -c "python3 \ /onnxruntime_src/tools/ci_build/build.py --enable_lto --build_java --build_nodejs --build_dir /build --config Release \ --skip_submodule_sync --parallel --use_binskim_compliant_compile_flags --build_shared_lib ${{ parameters.AdditionalBuildFlags }} && cd /build/Release && make install DESTDIR=/build/installed" workingDirectory: $(Build.SourcesDirectory) diff --git a/tools/ci_build/github/azure-pipelines/templates/download-deps.yml b/tools/ci_build/github/azure-pipelines/templates/download-deps.yml index 344aaa4aaf19a..fe3bc60c83dea 100644 --- a/tools/ci_build/github/azure-pipelines/templates/download-deps.yml +++ b/tools/ci_build/github/azure-pipelines/templates/download-deps.yml @@ -11,7 +11,7 @@ steps: packageType: upack feed: '/7424c8e4-5c62-490e-95c4-79446f31017c' definition: '517c4f6f-5437-4392-a70d-4f15ec5be2f0' - version: 1.0.203 + version: 1.0.213 downloadPath: $(Build.BinariesDirectory)/deps # The private ADO project @@ -22,7 +22,7 @@ steps: packageType: upack feed: '/4c7631f5-24c0-4307-8822-1aa8f180c325' definition: 'fd9dd5ad-b73e-4678-890e-edcf680dbc1a' - version: 1.0.203 + version: 1.0.213 downloadPath: $(Build.BinariesDirectory)/deps # You can add more ADO accounts at here. diff --git a/tools/ci_build/github/azure-pipelines/templates/jobs/download_linux_qnn_sdk.yml b/tools/ci_build/github/azure-pipelines/templates/jobs/download_linux_qnn_sdk.yml index 179a846509cc1..3596799ba236c 100644 --- a/tools/ci_build/github/azure-pipelines/templates/jobs/download_linux_qnn_sdk.yml +++ b/tools/ci_build/github/azure-pipelines/templates/jobs/download_linux_qnn_sdk.yml @@ -1,7 +1,7 @@ parameters: - name: QnnSDKVersion type: string - default: '2.28.2.241116' + default: '2.30.0.250109' steps: - script: | diff --git a/tools/ci_build/github/azure-pipelines/templates/jobs/download_win_qnn_sdk.yml b/tools/ci_build/github/azure-pipelines/templates/jobs/download_win_qnn_sdk.yml index 9df8b249f681e..922e945f15524 100644 --- a/tools/ci_build/github/azure-pipelines/templates/jobs/download_win_qnn_sdk.yml +++ b/tools/ci_build/github/azure-pipelines/templates/jobs/download_win_qnn_sdk.yml @@ -1,15 +1,15 @@ parameters: - name: QnnSDKVersion type: string - default: '2.28.2.241116' + default: '2.30.0.250109' steps: - powershell: | - azcopy.exe cp --recursive https://lotusscus.blob.core.windows.net/models/qnnsdk/qnn-v${{ parameters.QnnSDKVersion }}_win $(Agent.TempDirectory) + azcopy.exe cp --recursive https://lotusscus.blob.core.windows.net/models/qnnsdk/qnn-v${{ parameters.QnnSDKVersion }} $(Agent.TempDirectory) displayName: 'Download QNN SDK v${{ parameters.QnnSDKVersion }}' - powershell: | - echo "##vso[task.setvariable variable=QnnSDKRootDir]$(Agent.TempDirectory)\qnn-v${{ parameters.QnnSDKVersion }}_win" + echo "##vso[task.setvariable variable=QnnSDKRootDir]$(Agent.TempDirectory)\qnn-v${{ parameters.QnnSDKVersion }}" displayName: Set QnnSDKRootDir - task: CmdLine@2 diff --git a/tools/ci_build/github/azure-pipelines/templates/linux-wasm-ci.yml b/tools/ci_build/github/azure-pipelines/templates/linux-wasm-ci.yml index 3ab1cd45ff5f7..1df740798750d 100644 --- a/tools/ci_build/github/azure-pipelines/templates/linux-wasm-ci.yml +++ b/tools/ci_build/github/azure-pipelines/templates/linux-wasm-ci.yml @@ -26,6 +26,10 @@ parameters: type: boolean default: false +- name: BuildWebGPU + type: boolean + default: false + # In fact, it's only used on Linux for compiler cache. - name: BuildStaticLib type: boolean @@ -122,6 +126,24 @@ jobs: DisplayName: 'Build (simd + threads + JSEP)' WithCache: ${{ parameters.WithCache }} + - ${{ if eq(parameters.BuildWebGPU, true) }}: + # This step only verifies whether the build is successful. + # currently, we uses EMSDK 3.1.59, which is not compatible with Dawn's changes in its Emscripten fork. Closure compiler will not work for WebGPU build. + # Only enables in DEBUG build. + # + # TODO: when upgrading to a newer Emscripten version, we should fix this step. + - template: build-linux-wasm-step.yml + parameters: + Today: $(Today) + ${{ if eq(parameters.BuildStaticLib, true)}}: + AdditionalKey: wasm_inferencing_webgpu_exp | ${{ parameters.BuildConfig }} | static + ${{ else }}: + AdditionalKey: wasm_inferencing_webgpu_exp | ${{ parameters.BuildConfig }} + CacheDir: $(ORT_CACHE_DIR)/wasm_inferencing_webgpu + Arguments: '$(CommonBuildArgs) --build_dir $(Build.BinariesDirectory)/wasm_inferencing_webgpu --use_webgpu --target onnxruntime_webassembly --skip_tests' + DisplayName: 'Build (simd + threads + WebGPU experimental)' + WithCache: ${{ parameters.WithCache }} + - ${{ if eq(parameters.SkipPublish, false) }}: - script: | cp $(Build.BinariesDirectory)/wasm_inferencing/${{ parameters.BuildConfig }}/ort-wasm-simd-threaded.wasm $(Build.ArtifactStagingDirectory) diff --git a/tools/ci_build/github/azure-pipelines/templates/ondevice-training-cpu-packaging-pipeline.yml b/tools/ci_build/github/azure-pipelines/templates/ondevice-training-cpu-packaging-pipeline.yml index 8bbe8f82530ea..523f3ab58b982 100644 --- a/tools/ci_build/github/azure-pipelines/templates/ondevice-training-cpu-packaging-pipeline.yml +++ b/tools/ci_build/github/azure-pipelines/templates/ondevice-training-cpu-packaging-pipeline.yml @@ -102,10 +102,12 @@ stages: packageName: onnxruntime-training-android enable_code_sign: true +- stage: Final_AAR_Testing_Android_Training_Full + dependsOn: Android_Java_API_AAR_Packaging_Training_Full + jobs: - template: android-java-api-aar-test.yml parameters: artifactName: 'onnxruntime-training-android-full-aar' - job_name_suffix: 'Training_Full' packageName: onnxruntime-training-android - stage: NuGet_Packaging_Training_CPU @@ -115,7 +117,7 @@ stages: - Windows_Packaging_Training_CPU_x86_${{ parameters.BuildVariant }} - Windows_Packaging_Training_CPU_x64_${{ parameters.BuildVariant }} - Windows_Packaging_Training_CPU_arm64_${{ parameters.BuildVariant }} - - Android_Java_API_AAR_Packaging_Training_Full + - Final_AAR_Testing_Android_Training_Full condition: succeeded() jobs: - job: NuGet_Packaging_Training_CPU diff --git a/tools/ci_build/github/azure-pipelines/templates/py-linux-qnn.yml b/tools/ci_build/github/azure-pipelines/templates/py-linux-qnn.yml index 73cd79f75a63b..43a7a4ec35767 100644 --- a/tools/ci_build/github/azure-pipelines/templates/py-linux-qnn.yml +++ b/tools/ci_build/github/azure-pipelines/templates/py-linux-qnn.yml @@ -26,7 +26,7 @@ parameters: - name: QnnSdk displayName: QNN SDK version type: string - default: 2.28.2.241116 + default: 2.30.0.250109 jobs: - job: Linux_py_qnn_Wheels_x64 diff --git a/tools/ci_build/github/azure-pipelines/templates/py-packaging-selectable-stage.yml b/tools/ci_build/github/azure-pipelines/templates/py-packaging-selectable-stage.yml deleted file mode 100644 index 2485bd0e8e5e5..0000000000000 --- a/tools/ci_build/github/azure-pipelines/templates/py-packaging-selectable-stage.yml +++ /dev/null @@ -1,529 +0,0 @@ -parameters: -- name: enable_linux_cpu - displayName: 'Whether Linux CPU package is built.' - type: boolean - default: true - -- name: enable_windows_cpu - displayName: 'Whether Windows CPU package is built.' - type: boolean - default: true - -- name: enable_linux_gpu - displayName: 'Whether Linux GPU package is built.' - type: boolean - default: true - -- name: enable_windows_gpu - displayName: 'Whether Windows GPU package is built.' - type: boolean - default: true - -- name: python_version - displayName: 'Python version used for build' - type: object - default: ["'3.8'"] - -- name: cpu_build_py_parameters - displayName: 'Extra parameters to pass to build.py for CPU package.' - type: string - default: '--use_openvino CPU' - -- name: gpu_build_py_parameters - displayName: 'Extra parameters to pass to build.py for GPU package.' - type: string - default: ' ' - -- name: ubuntu_version_linux_cpu - displayName: 'Ubuntu Version for Linux CPU package.' - type: string - default: '20.04' - -stages: -- stage: Python_Packaging - - jobs: - - ${{ if eq(parameters.enable_linux_cpu, true) }}: - - job: Linux_CPU_py_Wheels - timeoutInMinutes: 90 - workspace: - clean: all - pool: onnxruntime-Ubuntu2204-AMD-CPU - strategy: - matrix: - ${{ each PythonVersion in parameters.python_version }}: - 'Python${{ PythonVersion }}': - PythonVersion: ${{ PythonVersion }} - variables: - UbuntuVersion: ${{ parameters.ubuntu_version_linux_cpu }} - steps: - - checkout: self - clean: true - submodules: recursive - - - template: get-docker-image-steps.yml - parameters: - Dockerfile: tools/ci_build/github/linux/docker/Dockerfile.ubuntu_openvino - Context: tools/ci_build/github/linux/docker - DockerBuildArgs: "--build-arg PYTHON_VERSION=$(PythonVersion) --build-arg UBUNTU_VERSION=$(UbuntuVersion)" - Repository: onnxruntimeubuntupython$(PythonVersion)cpubuild - - - task: CmdLine@2 - displayName: 'Build Python Wheel' - inputs: - script: | - mkdir -p $HOME/.onnx - docker run --rm \ - --volume /data/onnx:/data/onnx:ro \ - --volume $(Build.SourcesDirectory):/onnxruntime_src \ - --volume $(Build.BinariesDirectory):/build \ - --volume /data/models:/build/models:ro \ - --volume $HOME/.onnx:/home/onnxruntimedev/.onnx \ - -e BUILD_BUILDNUMBER \ - onnxruntimeubuntupython$(PythonVersion)cpubuild \ - python3 /onnxruntime_src/tools/ci_build/build.py \ - --build_dir /build \ - --config Release --update --build \ - --skip_submodule_sync \ - --parallel \ - --enable_lto \ - --build_wheel \ - --enable_onnx_tests \ - --test \ - --ctest_path '' \ - ${{ parameters.cpu_build_py_parameters }} - workingDirectory: $(Build.SourcesDirectory) - - - task: CopyFiles@2 - displayName: 'Copy Python Wheel to: $(Build.ArtifactStagingDirectory)' - inputs: - SourceFolder: '$(Build.BinariesDirectory)/Release/dist' - Contents: '*.whl' - TargetFolder: '$(Build.ArtifactStagingDirectory)' - - - task: PublishBuildArtifacts@1 - displayName: 'Publish Artifact: ONNXRuntime python wheel and documentation' - inputs: - ArtifactName: onnxruntime - - - template: component-governance-component-detection-steps.yml - parameters: - condition: 'succeeded' - - - template: clean-agent-build-directory-step.yml - - - ${{ if eq(parameters.enable_windows_cpu, true) }}: - - job: Windows_CPU_py_Wheels - pool: 'onnxruntime-cpu-openvino-winbuild' - strategy: - matrix: - ${{ each PythonVersion in parameters.python_version }}: - 'Python${{ PythonVersion }}': - PythonVersion: ${{ PythonVersion }} - variables: - OnnxRuntimeBuildDirectory: '$(Build.BinariesDirectory)' - EnvSetupScript: setup_env.bat - setVcvars: true - BuildConfig: 'RelWithDebInfo' - timeoutInMinutes: 120 - workspace: - clean: all - - steps: - - checkout: self - clean: true - submodules: recursive - - - task: UsePythonVersion@0 - inputs: - versionSpec: $(PythonVersion) - addToPath: true - - - task: BatchScript@1 - displayName: 'setup env' - inputs: - filename: '$(Build.SourcesDirectory)\tools\ci_build\github\windows\$(EnvSetupScript)' - modifyEnvironment: true - workingFolder: '$(Build.BinariesDirectory)' - - - task: BatchScript@1 - displayName: 'setup OpenVino env' - inputs: - filename: 'C:\Program Files\Intel\openvino_2021.4.752\bin\setupvars.bat' - modifyEnvironment: true - - - task: PowerShell@2 - displayName: 'Install ONNX' - inputs: - filePath: '$(Build.SourcesDirectory)/tools/ci_build/github/windows/install_third_party_deps.ps1' - workingDirectory: '$(Build.BinariesDirectory)' - arguments: -cpu_arch x64 -install_prefix $(Build.BinariesDirectory)\$(BuildConfig)\installed -build_config $(BuildConfig) - - - task: PythonScript@0 - displayName: 'Generate cmake config' - inputs: - scriptPath: '$(Build.SourcesDirectory)\tools\ci_build\build.py' - arguments: > - --config $(BuildConfig) - --enable_lto - --build_dir $(Build.BinariesDirectory) - --skip_submodule_sync - --cmake_generator "Visual Studio 17 2022" - --enable_pybind - --enable_onnx_tests - ${{ parameters.cpu_build_py_parameters }} - --parallel --update - workingDirectory: '$(Build.BinariesDirectory)' - - - task: VSBuild@1 - displayName: 'Build' - inputs: - solution: '$(Build.BinariesDirectory)\$(BuildConfig)\onnxruntime.sln' - platform: x64 - configuration: $(BuildConfig) - msbuildArchitecture: x64 - maximumCpuCount: true - logProjectEvents: true - workingFolder: '$(Build.BinariesDirectory)\$(BuildConfig)' - createLogFile: true - - # Esrp signing - - template: win-esrp-dll.yml - parameters: - FolderPath: '$(Build.BinariesDirectory)\$(BuildConfig)\$(BuildConfig)\onnxruntime\capi' - DisplayName: 'ESRP - Sign Native dlls' - DoEsrp: true - Pattern: '*.pyd,*.dll' - - - task: PythonScript@0 - displayName: 'Build wheel' - inputs: - scriptPath: '$(Build.SourcesDirectory)\setup.py' - arguments: 'bdist_wheel' - workingDirectory: '$(Build.BinariesDirectory)\$(BuildConfig)\$(BuildConfig)' - - - task: CopyFiles@2 - displayName: 'Copy Python Wheel to: $(Build.ArtifactStagingDirectory)' - inputs: - SourceFolder: '$(Build.BinariesDirectory)\$(BuildConfig)\$(BuildConfig)\dist' - Contents: '*.whl' - TargetFolder: '$(Build.ArtifactStagingDirectory)' - - - task: PublishBuildArtifacts@1 - displayName: 'Publish Artifact: ONNXRuntime python wheel' - inputs: - ArtifactName: onnxruntime - - - script: | - 7z x *.whl - workingDirectory: '$(Build.ArtifactStagingDirectory)' - displayName: 'unzip the package' - - task: CredScan@3 - displayName: 'Run CredScan' - inputs: - debugMode: false - continueOnError: true - - - task: BinSkim@4 - displayName: 'Run BinSkim' - inputs: - AnalyzeTargetGlob: '+:file|$(Build.ArtifactStagingDirectory)\**\*.dll;-:file|$(Build.ArtifactStagingDirectory)\**\DirectML.dll' - continueOnError: true - - - task: DeleteFiles@1 - displayName: 'Delete files from $(Build.BinariesDirectory)\$(BuildConfig)' - condition: and (succeeded(), eq(variables['PythonVersion'], '3.8')) - inputs: - SourceFolder: '$(Build.BinariesDirectory)\$(BuildConfig)' - Contents: | - **/*.obj - **/*.pdb - **/*.dll - - - powershell: | - python -m pip uninstall -y onnxruntime onnxruntime-gpu -qq - Get-ChildItem -Path $(Build.ArtifactStagingDirectory)/*.whl | foreach {pip --disable-pip-version-check install --force-reinstall --upgrade $_.fullname tabulate} - python -m pip install protobuf==3.18.1 - Remove-Item -Recurse -Force onnxruntime - python onnx_backend_test_series.py - workingDirectory: '$(Build.BinariesDirectory)\$(BuildConfig)\$(BuildConfig)' - displayName: 'Run Python Tests' - - #Skip it for 32 bits x86 build. Currently the scan tool has a bug: it doesn't allow me use 64 bits link.exe - #in 32 bits Win32 build. I tried all the settings but they all don't work. - - task: SDLNativeRules@3 - displayName: 'Run the PREfast SDL Native Rules for MSBuild' - condition: and (succeeded(), eq(variables['PythonVersion'], '3.8')) - inputs: - msBuildArchitecture: amd64 - setupCommandlines: 'python $(Build.SourcesDirectory)\tools\ci_build\build.py --config Debug --build_dir $(Build.BinariesDirectory) --skip_submodule_sync --cmake_generator "Visual Studio 17 2022" --enable_pybind --enable_onnx_tests --parallel --update --cmake_extra_defines onnxruntime_ENABLE_STATIC_ANALYSIS=ON' - msBuildCommandline: '"C:\Program Files\Microsoft Visual Studio\2022\Enterprise\MSBuild\Current\Bin\amd64\msbuild.exe" "$(Build.BinariesDirectory)\Debug\onnxruntime.sln" /p:platform="x64" /p:configuration=Debug /p:VisualStudioVersion="17.0" /m /p:PreferredToolArchitecture=x64' - excludedPaths: '$(Build.BinariesDirectory)#$(Build.SourcesDirectory)\cmake#C:\program files (x86)' - - - task: TSAUpload@2 - displayName: 'TSA upload' - condition: and(and (succeeded(), eq(variables['PythonVersion'], '3.8')), eq(variables['Build.SourceBranch'], 'refs/heads/main')) - inputs: - GdnPublishTsaOnboard: false - GdnPublishTsaConfigFile: '$(Build.sourcesDirectory)\.gdn\.gdntsa' - continueOnError: true - - - template: component-governance-component-detection-steps.yml - parameters: - condition: 'succeeded' - - - - ${{ if eq(parameters.enable_linux_gpu, true) }}: - - job: Linux_py_GPU_Wheels - timeoutInMinutes: 300 - workspace: - clean: all - pool: Onnxruntime-Linux-GPU - strategy: - matrix: - ${{ each PythonVersion in parameters.python_version }}: - 'Python${{ PythonVersion }}': - PythonVersion: ${{ PythonVersion }} - steps: - - checkout: self - clean: true - submodules: recursive - - - template: set-python-manylinux-variables-step.yml - - - template: get-docker-image-steps.yml - parameters: - Dockerfile: tools/ci_build/github/linux/docker/Dockerfile.manylinux2014_cuda11_8_tensorrt8_6 - Context: tools/ci_build/github/linux/docker - DockerBuildArgs: "--network=host --build-arg POLICY=manylinux2014 --build-arg PLATFORM=x86_64 --build-arg DEVTOOLSET_ROOTPATH=/opt/rh/devtoolset-11/root --build-arg PREPEND_PATH=/opt/rh/devtoolset-11/root/usr/bin: --build-arg LD_LIBRARY_PATH_ARG=/opt/rh/devtoolset-11/root/usr/lib64:/opt/rh/devtoolset-11/root/usr/lib:/opt/rh/devtoolset-11/root/usr/lib64/dyninst:/opt/rh/devtoolset-11/root/usr/lib/dyninst:/usr/local/lib64 --build-arg BUILD_UID=$( id -u )" - Repository: onnxruntimecuda118xtrt86build - - - task: CmdLine@2 - displayName: 'Build Python Wheel' - inputs: - script: | - mkdir -p $HOME/.onnx - docker run --gpus all -e CC=/opt/rh/devtoolset-11/root/usr/bin/cc -e CXX=/opt/rh/devtoolset-11/root/usr/bin/c++ -e CFLAGS="-Wp,-D_FORTIFY_SOURCE=2 -Wp,-D_GLIBCXX_ASSERTIONS -fstack-protector-strong -fstack-clash-protection -fcf-protection -O3 -Wl,--strip-all" -e CXXFLAGS="-Wp,-D_FORTIFY_SOURCE=2 -Wp,-D_GLIBCXX_ASSERTIONS -fstack-protector-strong -fstack-clash-protection -fcf-protection -O3 -Wl,--strip-all" --rm \ - --volume /data/onnx:/data/onnx:ro \ - --volume $(Build.SourcesDirectory):/onnxruntime_src \ - --volume $(Build.BinariesDirectory):/build \ - --volume /data/models:/build/models:ro \ - --volume $HOME/.onnx:/home/onnxruntimedev/.onnx \ - -e BUILD_BUILDNUMBER \ - onnxruntimecuda118xtrt86build \ - $(PythonManylinuxDir)/bin/python3 /onnxruntime_src/tools/ci_build/build.py \ - --build_dir /build --cmake_generator Ninja \ - --config Release --update --build \ - --skip_submodule_sync \ - --parallel \ - --build_wheel \ - --enable_onnx_tests --use_tensorrt --cuda_version=11.8 --tensorrt_home=/usr --cuda_home=/usr/local/cuda-11.8 --cudnn_home=/usr/local/cuda-11.8 \ - ${{ parameters.gpu_build_py_parameters }} \ - --cmake_extra_defines CMAKE_CUDA_HOST_COMPILER=/opt/rh/devtoolset-11/root/usr/bin/cc 'CMAKE_CUDA_ARCHITECTURES=37;50;52;60;61;70;75;80' - workingDirectory: $(Build.SourcesDirectory) - - - task: CmdLine@2 - displayName: 'Running tests' - condition: and(succeeded(), eq(variables['PythonVersion'], '3.8')) - inputs: - script: | - set -e -x - rm -rf $(Build.BinariesDirectory)/Release/onnxruntime $(Build.BinariesDirectory)/Release/pybind11 - sudo rm -f /build /onnxruntime_src - sudo ln -s $(Build.SourcesDirectory) /onnxruntime_src - python3 -m pip uninstall -y onnxruntime onnxruntime-gpu onnxruntime-training onnxruntime-directml -qq - cp $(Build.SourcesDirectory)/tools/ci_build/github/linux/docker/scripts/manylinux/requirements.txt $(Build.BinariesDirectory)/requirements.txt - # Test ORT with the latest ONNX release. - sed -i "s/git+http:\/\/github\.com\/onnx\/onnx.*/onnx/" $(Build.BinariesDirectory)/requirements.txt - python3 -m pip install -r $(Build.BinariesDirectory)/requirements.txt - python3 -m pip install $(Build.BinariesDirectory)/Release/dist/*.whl - cd $(Build.BinariesDirectory)/Release - ls $(Build.BinariesDirectory)/models - rmdir $(Build.BinariesDirectory)/models - ln -sf /data/models $(Build.BinariesDirectory) - python3 /onnxruntime_src/tools/ci_build/build.py \ - --build_dir $(Build.BinariesDirectory) --cmake_generator Ninja \ - --config Release --test \ - --skip_submodule_sync \ - --parallel \ - --build_wheel \ - --enable_onnx_tests --use_tensorrt --cuda_version=11.8 --tensorrt_home=/usr --cuda_home=/usr/local/cuda-11.8 --cudnn_home=/usr/local/cuda-11.8 \ - ${{ parameters.gpu_build_py_parameters }} --ctest_path '' \ - --cmake_extra_defines CMAKE_CUDA_HOST_COMPILER=/opt/rh/devtoolset-11/root/usr/bin/cc 'CMAKE_CUDA_ARCHITECTURES=37;50;52;60;61;70;75;80' - - - task: CopyFiles@2 - displayName: 'Copy Python Wheel to: $(Build.ArtifactStagingDirectory)' - inputs: - SourceFolder: '$(Build.BinariesDirectory)/Release/dist' - Contents: '*.whl' - TargetFolder: '$(Build.ArtifactStagingDirectory)' - - - task: PublishBuildArtifacts@1 - displayName: 'Publish Artifact: ONNXRuntime python wheel' - inputs: - ArtifactName: onnxruntime_gpu - - - template: component-governance-component-detection-steps.yml - parameters: - condition: 'succeeded' - - - template: clean-agent-build-directory-step.yml - - - ${{ if eq(parameters.enable_windows_gpu, true) }}: - - job: Windows_py_GPU_Wheels - workspace: - clean: all - pool: 'onnxruntime-Win2022-GPU-A10' - timeoutInMinutes: 300 - variables: - - template: common-variables.yml - CUDA_VERSION: '11.8' - buildArch: x64 - EpBuildFlags: --use_tensorrt --tensorrt_home="$(Agent.TempDirectory)\${{ variables.win_trt_folder_cuda11 }}" --cuda_version=$(CUDA_VERSION) --cuda_home="C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v$(CUDA_VERSION)" --cmake_extra_defines "CMAKE_CUDA_ARCHITECTURES=37;50;52;60;61;70;75;80" - EnvSetupScript: setup_env_gpu.bat - EP_NAME: gpu - VSGenerator: 'Visual Studio 17 2022' - strategy: - matrix: - ${{ each PythonVersion in parameters.python_version }}: - 'Python${{ PythonVersion }}': - PythonVersion: ${{ PythonVersion }} - steps: - - checkout: self - clean: true - submodules: recursive - - - task: UsePythonVersion@0 - inputs: - versionSpec: $(PythonVersion) - addToPath: true - architecture: 'x64' - - - task: BatchScript@1 - displayName: 'setup env' - inputs: - filename: '$(Build.SourcesDirectory)\tools\ci_build\github\windows\$(EnvSetupScript)' - modifyEnvironment: true - workingFolder: '$(Build.BinariesDirectory)' - - - task: PowerShell@2 - displayName: 'Install ONNX' - inputs: - filePath: '$(Build.SourcesDirectory)/tools/ci_build/github/windows/install_third_party_deps.ps1' - workingDirectory: '$(Build.BinariesDirectory)' - arguments: -cpu_arch x64 -install_prefix $(Build.BinariesDirectory)\RelWithDebInfo\installed -build_config RelWithDebInfo - - - task: PythonScript@0 - displayName: 'Generate cmake config' - inputs: - scriptPath: '$(Build.SourcesDirectory)\tools\ci_build\build.py' - arguments: > - --config RelWithDebInfo - --build_dir $(Build.BinariesDirectory) - --skip_submodule_sync - --cmake_generator "$(VSGenerator)" - --enable_pybind - --enable_onnx_tests - ${{ parameters.gpu_build_py_parameters }} - --parallel --update - $(EpBuildFlags) - workingDirectory: '$(Build.BinariesDirectory)' - - - task: VSBuild@1 - displayName: 'Build' - inputs: - solution: '$(Build.BinariesDirectory)\RelWithDebInfo\onnxruntime.sln' - platform: x64 - configuration: RelWithDebInfo - msbuildArchitecture: $(buildArch) - maximumCpuCount: true - logProjectEvents: true - workingFolder: '$(Build.BinariesDirectory)\RelWithDebInfo' - createLogFile: true - - # Esrp signing - - template: win-esrp-dll.yml - parameters: - FolderPath: '$(Build.BinariesDirectory)\RelWithDebInfo\RelWithDebInfo\onnxruntime\capi' - DisplayName: 'ESRP - Sign Native dlls' - DoEsrp: true - Pattern: '*.pyd,*.dll' - - - task: PythonScript@0 - displayName: 'Build wheel' - inputs: - scriptPath: '$(Build.SourcesDirectory)\setup.py' - arguments: 'bdist_wheel ${{ parameters.gpu_build_py_parameters }} --wheel_name_suffix=$(EP_NAME)' - workingDirectory: '$(Build.BinariesDirectory)\RelWithDebInfo\RelWithDebInfo' - - - task: CopyFiles@2 - displayName: 'Copy Python Wheel to: $(Build.ArtifactStagingDirectory)' - inputs: - SourceFolder: '$(Build.BinariesDirectory)\RelWithDebInfo\RelWithDebInfo\dist' - Contents: '*.whl' - TargetFolder: '$(Build.ArtifactStagingDirectory)' - - - task: PublishBuildArtifacts@1 - displayName: 'Publish Artifact: ONNXRuntime python wheel' - inputs: - ArtifactName: onnxruntime_gpu - - - script: | - 7z x *.whl - workingDirectory: '$(Build.ArtifactStagingDirectory)' - displayName: 'unzip the package' - - - task: CredScan@3 - displayName: 'Run CredScan' - inputs: - debugMode: false - continueOnError: true - - - task: BinSkim@4 - displayName: 'Run BinSkim' - inputs: - AnalyzeTargetGlob: '+:file|$(Build.ArtifactStagingDirectory)\**\*.dll;-:file|$(Build.ArtifactStagingDirectory)\**\DirectML.dll' - - - task: DeleteFiles@1 - displayName: 'Delete files from $(Build.BinariesDirectory)\RelWithDebInfo' - condition: and (succeeded(), eq(variables['PythonVersion'], '3.8')) - inputs: - SourceFolder: '$(Build.BinariesDirectory)\RelWithDebInfo' - Contents: | - **/*.obj - **/*.pdb - **/*.dll - - - powershell: | - python -m pip uninstall -y onnxruntime onnxruntime-gpu -qq - Get-ChildItem -Path $(Build.ArtifactStagingDirectory)/*.whl | foreach {pip --disable-pip-version-check install --upgrade $_.fullname tabulate} - Remove-Item -Recurse -Force onnxruntime - python onnx_backend_test_series.py - workingDirectory: '$(Build.BinariesDirectory)\RelWithDebInfo\RelWithDebInfo' - displayName: 'Run Python Tests' - - #Manually set msBuildCommandline so that we can also set CAExcludePath - - task: SDLNativeRules@3 - displayName: 'Run the PREfast SDL Native Rules for MSBuild' - condition: and (succeeded(), eq(variables['PythonVersion'], '3.8')) - inputs: - msBuildArchitecture: amd64 - setupCommandlines: 'python $(Build.SourcesDirectory)\tools\ci_build\build.py --config RelWithDebInfo --build_dir $(Build.BinariesDirectory) --skip_submodule_sync --cmake_generator "$(VSGenerator)" --enable_pybind --enable_onnx_tests ${{ parameters.gpu_build_py_parameters }} --parallel $(EpBuildFlags) --update --cmake_extra_defines onnxruntime_ENABLE_STATIC_ANALYSIS=ON' - ${{if eq(variables.VSGenerator, 'Visual Studio 16 2019')}}: - msBuildCommandline: '"C:\Program Files\Microsoft Visual Studio\2019\Enterprise\MSBuild\Current\Bin\amd64\msbuild.exe" "$(Build.BinariesDirectory)\RelWithDebInfo\onnxruntime.sln" /p:platform=x64 /p:configuration="RelWithDebInfo" /p:VisualStudioVersion="16.0" /m /p:PreferredToolArchitecture=x64' - ${{else}}: - msBuildCommandline: '"C:\Program Files\Microsoft Visual Studio\2022\Enterprise\MSBuild\Current\Bin\amd64\msbuild.exe" "$(Build.BinariesDirectory)\RelWithDebInfo\onnxruntime.sln" /p:platform=x64 /p:configuration="RelWithDebInfo" /p:VisualStudioVersion="17.0" /m /p:PreferredToolArchitecture=x64' - excludedPaths: '$(Build.BinariesDirectory)#$(Build.SourcesDirectory)\cmake#C:\program files (x86)' - - - task: TSAUpload@2 - displayName: 'TSA upload' - condition: and(and (succeeded(), eq(variables['PythonVersion'], '3.8')), eq(variables['Build.SourceBranch'], 'refs/heads/main')) - inputs: - GdnPublishTsaOnboard: false - GdnPublishTsaConfigFile: '$(Build.sourcesDirectory)\.gdn\.gdntsa' - continueOnError: true - - - template: component-governance-component-detection-steps.yml - parameters: - condition: 'succeeded' - diff --git a/tools/ci_build/github/azure-pipelines/templates/py-win-arm64-qnn.yml b/tools/ci_build/github/azure-pipelines/templates/py-win-arm64-qnn.yml index e07f0afa6109c..ff2ecb0d3c28f 100644 --- a/tools/ci_build/github/azure-pipelines/templates/py-win-arm64-qnn.yml +++ b/tools/ci_build/github/azure-pipelines/templates/py-win-arm64-qnn.yml @@ -7,7 +7,7 @@ parameters: - name: QNN_SDK displayName: QNN SDK Version type: string - default: 2.28.2.241116 + default: 2.30.0.250109 - name: ENV_SETUP_SCRIPT type: string @@ -94,6 +94,7 @@ jobs: --build_dir $(Build.BinariesDirectory) --skip_submodule_sync --cmake_generator "$(VSGenerator)" + --build_shared_lib --use_qnn --qnn_home $(QnnSDKRootDir) --enable_pybind diff --git a/tools/ci_build/github/azure-pipelines/templates/py-win-arm64ec-qnn.yml b/tools/ci_build/github/azure-pipelines/templates/py-win-arm64ec-qnn.yml index 8cc647c2464f3..f382156c03944 100644 --- a/tools/ci_build/github/azure-pipelines/templates/py-win-arm64ec-qnn.yml +++ b/tools/ci_build/github/azure-pipelines/templates/py-win-arm64ec-qnn.yml @@ -7,7 +7,7 @@ parameters: - name: QNN_SDK displayName: QNN SDK Version type: string - default: 2.28.2.241116 + default: 2.30.0.250109 - name: ENV_SETUP_SCRIPT type: string @@ -92,6 +92,7 @@ jobs: --build_dir $(Build.BinariesDirectory) --skip_submodule_sync --cmake_generator "$(VSGenerator)" + --build_shared_lib --use_qnn --qnn_home $(QnnSDKRootDir) --enable_pybind diff --git a/tools/ci_build/github/azure-pipelines/templates/py-win-x64-qnn.yml b/tools/ci_build/github/azure-pipelines/templates/py-win-x64-qnn.yml index 466fee92d0d5e..a5f2a481e6ba8 100644 --- a/tools/ci_build/github/azure-pipelines/templates/py-win-x64-qnn.yml +++ b/tools/ci_build/github/azure-pipelines/templates/py-win-x64-qnn.yml @@ -7,7 +7,7 @@ parameters: - name: QNN_SDK displayName: QNN SDK Version type: string - default: 2.28.2.241116 + default: 2.30.0.250109 - name: ENV_SETUP_SCRIPT type: string @@ -92,6 +92,7 @@ jobs: --build_dir $(Build.BinariesDirectory) --skip_submodule_sync --cmake_generator "$(VSGenerator)" + --build_shared_lib --use_qnn --qnn_home $(QnnSDKRootDir) --enable_pybind diff --git a/tools/ci_build/github/azure-pipelines/templates/qnn-ep-win.yml b/tools/ci_build/github/azure-pipelines/templates/qnn-ep-win.yml index aa0b6bf6d391e..5a74998ca4bc8 100644 --- a/tools/ci_build/github/azure-pipelines/templates/qnn-ep-win.yml +++ b/tools/ci_build/github/azure-pipelines/templates/qnn-ep-win.yml @@ -1,5 +1,5 @@ parameters: - QnnSdk: '2.28.2.241116' + QnnSdk: '2.30.0.250109' build_config: 'RelWithDebInfo' IsReleaseBuild: false DoEsrp: false @@ -93,12 +93,18 @@ stages: workingFolder: '$(Build.BinariesDirectory)\${{ parameters.build_config }}' createLogFile: true + - task: CmdLine@2 + displayName: 'Print contents of binaries directory' + inputs: + script: | + dir $(Build.BinariesDirectory)\${{ parameters.build_config }}\${{ parameters.build_config }} + - template: win-esrp-dll.yml parameters: FolderPath: '$(Build.BinariesDirectory)\${{ parameters.build_config }}\${{ parameters.build_config }}' DisplayName: 'ESRP - Sign dlls' DoEsrp: ${{ parameters.DoEsrp }} - Pattern: 'onnxruntime.dll' + Pattern: 'onnxruntime*.dll' - task: MSBuild@1 displayName: 'Restore NuGet Packages and create project.assets.json' diff --git a/tools/ci_build/github/azure-pipelines/templates/react-native-ci.yml b/tools/ci_build/github/azure-pipelines/templates/react-native-ci.yml index f4d00c274030c..b46fae79899e2 100644 --- a/tools/ci_build/github/azure-pipelines/templates/react-native-ci.yml +++ b/tools/ci_build/github/azure-pipelines/templates/react-native-ci.yml @@ -29,11 +29,10 @@ parameters: type: boolean stages: -- stage: Build_Packages - displayName: Build Packages +- stage: Build_Android_Packages + displayName: Build_Android_Packages dependsOn: '${{parameters.InitialStageDependsOn}}' jobs: - - template: android-java-api-aar.yml parameters: buildConfig: '${{parameters.BuildConfig}}' @@ -43,8 +42,21 @@ stages: enable_code_sign: '${{parameters.enable_code_sign}}' pool_name: '${{parameters.PoolName}}' packageName: 'onnxruntime-android' +- stage: ReactNative_CI_Android + displayName: ReactNative_CI_Android + dependsOn: Build_Android_Packages + jobs: + - template: ../stages/jobs/react-natvie-andriod-e2e-test-job.yml + parameters: + PackageName: '${{parameters.PackageName}}' + ArtifactName: 'onnxruntime-android-full-aar' + NpmPackagingMode: '${{parameters.NpmPackagingMode}}' - - job: Build_Ios_Pod_For_React_Native +- stage: ReactNative_CI_iOS + displayName: ReactNative_CI_iOS + dependsOn: '${{parameters.InitialStageDependsOn}}' + jobs: + - job: ReactNative_CI_iOS pool: vmImage: 'macOS-13' @@ -67,7 +79,7 @@ stages: - script: | pip install -r tools/ci_build/github/apple/ios_packaging/requirements.txt displayName: "Install Python requirements" - + # Build the iOS package - template: mac-build-step-with-cache.yml parameters: WithCache: true @@ -75,58 +87,25 @@ stages: AdditionalKey: react_${{parameters.BuildConfig}} CacheDir: $(ORT_CACHE_DIR) BuildStep: - - script: | - set -e -x - python $(Build.SourcesDirectory)/tools/ci_build/github/apple/build_and_assemble_apple_pods.py \ - --build-dir "$(Build.BinariesDirectory)/ios_framework_full" \ - --staging-dir "$(Build.BinariesDirectory)/staging" \ - --variant Full \ - --build-settings-file $(Build.SourcesDirectory)/tools/ci_build/github/js/react_native_e2e_full_ios_framework_build_settings.json - displayName: Build iOS package and assemble pods - env: - CC: clang - CXX: clang++ - CCACHE_CPP2: 1 - CCACHE_DEPEND: 1 - CCACHE_SLOPPINESS: modules - CCACHE_DIR: $(ORT_CACHE_DIR) - - - task: PublishPipelineArtifact@1 - inputs: - targetPath: '$(Build.BinariesDirectory)/staging' - artifact: 'onnxruntime-ios-full-pod' - -- stage: ReactNative_CI - displayName: React Native CI - dependsOn: - - Build_Packages - jobs: - - job: ReactNative_CI - pool: - vmImage: 'macOS-13' - variables: - runCodesignValidationInjection: false - timeoutInMinutes: 90 - steps: - - template: use-xcode-version.yml - - task: UsePythonVersion@0 - displayName: Use python 3.12 - inputs: - versionSpec: "3.12" - addToPath: true - architecture: "x64" - - - task: JavaToolInstaller@0 - displayName: Use jdk 17 - inputs: - versionSpec: '17' - jdkArchitectureOption: 'x64' - jdkSourceOption: 'PreInstalled' - + - script: | + set -e -x + python $(Build.SourcesDirectory)/tools/ci_build/github/apple/build_and_assemble_apple_pods.py \ + --build-dir "$(Build.BinariesDirectory)/ios_framework_full" \ + --staging-dir "$(Build.BinariesDirectory)/ios-full-pod" \ + --variant Full \ + --build-settings-file $(Build.SourcesDirectory)/tools/ci_build/github/js/react_native_e2e_full_ios_framework_build_settings.json + displayName: Build iOS package and assemble pods + env: + CC: clang + CXX: clang++ + CCACHE_CPP2: 1 + CCACHE_DEPEND: 1 + CCACHE_SLOPPINESS: modules + CCACHE_DIR: $(ORT_CACHE_DIR) + # Test the iOS package - task: NodeTool@0 inputs: versionSpec: '20.x' - - script: brew install coreutils ninja npm yarn displayName: Install coreutils, ninja, npm, and yarn @@ -143,27 +122,6 @@ stages: brew install applesimutils displayName: Install applesimutils tools required by detox ios - - task: DownloadPipelineArtifact@2 - inputs: - buildType: 'current' - artifactName: 'onnxruntime-android-full-aar' - targetPath: '$(Build.BinariesDirectory)/android-full-aar' - displayName: Download Android AAR artifacts - - - task: CopyFiles@2 - inputs: - sourceFolder: $(Build.BinariesDirectory)/android-full-aar - contents: onnxruntime-android-*.aar - targetFolder: $(Build.SourcesDirectory)/js/react_native/android/libs - displayName: Copy Android package to React Native directory - - - task: DownloadPipelineArtifact@2 - inputs: - buildType: 'current' - artifactName: 'onnxruntime-ios-full-pod' - targetPath: '$(Build.BinariesDirectory)/ios-full-pod' - displayName: Download iOS pod artifacts - - script: | npm ci workingDirectory: '$(Build.SourcesDirectory)/js' @@ -190,6 +148,7 @@ stages: displayName: Pack NPM packages - script: | + set -x -e mv $(Build.SourcesDirectory)/js/common/onnxruntime-common*.tgz onnxruntime-common.tgz yarn add --no-lockfile file:./onnxruntime-common.tgz mv $(Build.SourcesDirectory)/js/react_native/onnxruntime-react-native*.tgz onnxruntime-react-native.tgz @@ -198,29 +157,10 @@ stages: workingDirectory: '$(Build.SourcesDirectory)/js/react_native/e2e' displayName: Bootstrap Android and iOS e2e tests -# TODO: remove this once we upgrade the RN to post 0.70.15 or 0.73.3+ versions this is just a temporary workaround - script: | - if [[ "$OSTYPE" == "darwin"* ]]; then - sed -i .bak "/boostorg.jfrog.io/c\\ - spec.source = { :http => 'https://archives.boost.io/release/1.76.0/source/boost_1_76_0.tar.bz2', " boost.podspec - else - sed -i .bak "/boostorg.jfrog.io/c\spec.source = { :http => 'https://archives.boost.io/release/1.76.0/source/boost_1_76_0.tar.bz2', " boost.podspec - fi - rm -f boost.podspec.bak - workingDirectory: '$(Build.SourcesDirectory)/js/react_native/node_modules/react-native/third-party-podspecs' - displayName: Path the boost 1.76.0 source URL in boost.podspec js/react_native/node_modules/react-native/third-party-podspecs/boost.podspec - -# TODO: remove this once we upgrade the RN to post 0.70.15 or 0.73.3+ versions this is just a temporary workaround - - script: | - if [[ "$OSTYPE" == "darwin"* ]]; then - sed -i .bak "/boostorg.jfrog.io/c\\ - spec.source = { :http => 'https://archives.boost.io/release/1.76.0/source/boost_1_76_0.tar.bz2', " boost.podspec - else - sed -i .bak "/boostorg.jfrog.io/c\spec.source = { :http => 'https://archives.boost.io/release/1.76.0/source/boost_1_76_0.tar.bz2', " boost.podspec - fi - rm -f boost.podspec.bak - workingDirectory: '$(Build.SourcesDirectory)/js/react_native/e2e/node_modules/react-native/third-party-podspecs' - displayName: Path the boost 1.76.0 source URL in boost.podspec in js/react_native/e2e/node_modules/react-native/third-party-podspecs/boost.podspec + yarn add --dev jest-junit + workingDirectory: '$(Build.SourcesDirectory)/js/react_native/e2e' + displayName: install jest junit reporter js/react_native/e2e - script: | ORT_C_LOCAL_POD_PATH=$(Build.BinariesDirectory)/ios-full-pod/onnxruntime-c \ @@ -234,79 +174,10 @@ stages: workingDirectory: '$(Build.SourcesDirectory)/js/react_native/e2e/ios' displayName: Pod install for onnxruntime react native ios e2e tests - - script: | - yarn add --dev jest-junit - workingDirectory: '$(Build.SourcesDirectory)/js/react_native/e2e' - displayName: install jest junit reporter js/react_native/e2e - - - script: | - keytool -genkey -v -keystore debug.keystore -alias androiddebugkey -storepass android \ - -keypass android -keyalg RSA -keysize 2048 -validity 999999 -dname "CN=Android Debug,O=Android,C=US" - workingDirectory: '$(Build.SourcesDirectory)/js/react_native/e2e/android' - displayName: Generate a debug keystore - - - task: CopyFiles@2 - inputs: - sourceFolder: $(Build.BinariesDirectory)/android-full-aar - contents: onnxruntime-*.aar - targetFolder: $(Build.SourcesDirectory)/js/react_native/e2e/android/app/libs - displayName: Copy Android package to Android e2e test directory - - - script: | - detox build --configuration android.emu.release - workingDirectory: '$(Build.SourcesDirectory)/js/react_native/e2e' - displayName: Build React Native Detox Android e2e Tests - - script: | detox build --configuration ios.sim.release workingDirectory: '$(Build.SourcesDirectory)/js/react_native/e2e' displayName: Build React Native Detox iOS e2e Tests - - # - # Unit tests and E2E tests with Android emulator - # - - template: use-android-emulator.yml - parameters: - create: true - start: true - - - template: android-dump-logs-from-steps.yml - parameters: - steps: - - task: Gradle@3 - inputs: - gradleWrapperFile: '$(Build.SourcesDirectory)/js/react_native/android/gradlew' - workingDirectory: '$(Build.SourcesDirectory)/js/react_native/android' - options: '--stacktrace' - tasks: 'connectedDebugAndroidTest' - publishJUnitResults: true - testResultsFiles: '**/TEST-*.xml' - testRunTitle: 'React Native Android Instrumented Test results' - sonarQubeRunAnalysis: false - spotBugsAnalysis: false - displayName: Run React Native Android Instrumented Tests - - - script: | - JEST_JUNIT_OUTPUT_FILE=$(Build.SourcesDirectory)/js/react_native/e2e/android-test-results.xml \ - detox test --record-logs all \ - --configuration android.emu.release \ - --loglevel trace \ - --take-screenshots failing - workingDirectory: '$(Build.SourcesDirectory)/js/react_native/e2e' - displayName: Run React Native Detox Android e2e Tests - - - template: use-android-emulator.yml - parameters: - stop: true - - - task: PublishTestResults@2 - inputs: - testResultsFiles: '$(Build.SourcesDirectory)/js/react_native/e2e/android-test-results.xml' - failTaskOnFailedTests: true - testRunTitle: 'React Native Detox Android e2e Test Results' - condition: succeededOrFailed() - displayName: Publish React Native Detox Android e2e Test Results - # # Unit tests and E2E tests with iOS simulator # @@ -372,44 +243,14 @@ stages: condition: succeededOrFailed() displayName: Publish React Native Detox iOS e2e Test Results - - script: | - git restore . - workingDirectory: '$(Build.SourcesDirectory)/js' - displayName: Restore git changes - - - task: PowerShell@2 - inputs: - filePath: '$(Build.SourcesDirectory)/tools/ci_build/github/js/pack-npm-packages.ps1' - arguments: '"${{parameters.NpmPackagingMode}}" $(Build.SourcesDirectory) react_native' - workingDirectory: '$(Build.SourcesDirectory)' - errorActionPreference: stop - displayName: Pack NPM packages - - - task: CopyFiles@2 - inputs: - sourceFolder: $(Build.SourcesDirectory)/js/common - contents: onnxruntime-common*.tgz - targetFolder: $(Build.ArtifactStagingDirectory) - displayName: 'Create Artifacts onnxruntime-common' - - - task: CopyFiles@2 - inputs: - sourceFolder: $(Build.SourcesDirectory)/js/react_native - contents: onnxruntime-react-native*.tgz - targetFolder: $(Build.ArtifactStagingDirectory) - displayName: Create Artifacts onnxruntime-react-native - - task: PublishPipelineArtifact@1 inputs: - artifact: e2e_test_logs_$(Build.BuildId)_$(Build.BuildNumber)_$(System.JobAttempt) + artifact: ios_e2e_test_logs_$(Build.BuildId)_$(Build.BuildNumber)_$(System.JobAttempt) targetPath: '$(Build.SourcesDirectory)/js/react_native/e2e/artifacts' condition: succeededOrFailed() displayName: Publish React Native Detox E2E test logs - - task: PublishPipelineArtifact@0 - inputs: - artifactName: '${{parameters.PackageName}}' - targetPath: '$(Build.ArtifactStagingDirectory)' - displayName: Publish Pipeline Artifact - - template: explicitly-defined-final-tasks.yml + + + diff --git a/tools/ci_build/github/azure-pipelines/templates/web-ci.yml b/tools/ci_build/github/azure-pipelines/templates/web-ci.yml index 3f44bcb22279f..d059921bb74c9 100644 --- a/tools/ci_build/github/azure-pipelines/templates/web-ci.yml +++ b/tools/ci_build/github/azure-pipelines/templates/web-ci.yml @@ -23,6 +23,10 @@ parameters: displayName: 'Build JSEP' type: boolean default: true +- name: BuildWebGPU + displayName: 'Build WebGPU (EP)' + type: boolean + default: true - name: WASMTemplate type: string @@ -98,6 +102,7 @@ stages: ExtraBuildArgs: '--enable_wasm_profiling ${{ parameters.ExtraBuildArgs }}' PoolName: ${{ parameters.PoolName }} BuildJsep: ${{ parameters.BuildJsep }} + BuildWebGPU: ${{ parameters.BuildWebGPU }} WithCache: ${{ parameters.WithCache }} - stage: Build_web_Debug @@ -128,6 +133,7 @@ stages: ExtraBuildArgs: '--target onnxruntime_webassembly --skip_tests --enable_wasm_api_exception_catching --disable_rtti ${{ parameters.ExtraBuildArgs }}' PoolName: ${{ parameters.PoolName }} BuildJsep: ${{ parameters.BuildJsep }} + BuildWebGPU: false WithCache: ${{ parameters.WithCache }} - ${{ if eq(parameters.BuildStaticLib, 'true') }}: diff --git a/tools/ci_build/github/azure-pipelines/win-ci-pipeline.yml b/tools/ci_build/github/azure-pipelines/win-ci-pipeline.yml index 94c2d35a563b6..d96f1cb68c388 100644 --- a/tools/ci_build/github/azure-pipelines/win-ci-pipeline.yml +++ b/tools/ci_build/github/azure-pipelines/win-ci-pipeline.yml @@ -177,6 +177,25 @@ stages: WITH_CACHE: false MachinePool: 'onnxruntime-Win-CPU-2022' +- stage: x64_release_ep_generic_interface + dependsOn: [] + jobs: + - template: templates/jobs/win-ci-vs-2022-job.yml + parameters: + BuildConfig: 'RelWithDebInfo' + buildArch: x64 + additionalBuildFlags: --enable_generic_interface + msbuildPlatform: x64 + isX86: false + job_name_suffix: x64_release_ep_generic_interface + RunOnnxRuntimeTests: false # --enable_generic_interface does not build tests + EnablePython: false + isTraining: false + ORT_EP_NAME: CPU + GenerateDocumentation: false + WITH_CACHE: false + MachinePool: 'onnxruntime-Win-CPU-2022' + - stage: x86_release dependsOn: [] jobs: diff --git a/tools/ci_build/github/azure-pipelines/win-qnn-arm64-ci-pipeline.yml b/tools/ci_build/github/azure-pipelines/win-qnn-arm64-ci-pipeline.yml index 5c013fae6be0b..787c3ffe23bd9 100644 --- a/tools/ci_build/github/azure-pipelines/win-qnn-arm64-ci-pipeline.yml +++ b/tools/ci_build/github/azure-pipelines/win-qnn-arm64-ci-pipeline.yml @@ -33,10 +33,10 @@ parameters: - name: QnnSdk displayName: QNN SDK version type: string - default: 2.28.2.241116 + default: 2.30.0.250109 jobs: -- job: 'build' +- job: 'BUILD_QNN_EP' pool: 'onnxruntime-qnn-windows-vs-2022-arm64' variables: DOTNET_SKIP_FIRST_TIME_EXPERIENCE: true @@ -46,6 +46,12 @@ jobs: timeoutInMinutes: 240 workspace: clean: all + strategy: + matrix: + SHARED_LIB: + QnnLibKind: 'shared_lib' + STATIC_LIB: + QnnLibKind: 'static_lib' steps: - script: | @@ -79,7 +85,8 @@ jobs: --config $(BuildConfig) --build_dir $(Build.BinariesDirectory) --cmake_generator "Visual Studio 17 2022" - --use_qnn + --build_shared_lib + --use_qnn $(QnnLibKind) --qnn_home $(QnnSDKRootDir) --update --build --parallel @@ -88,7 +95,8 @@ jobs: --config $(BuildConfig) ^ --build_dir $(Build.BinariesDirectory) ^ --cmake_generator "Visual Studio 17 2022" ^ - --use_qnn ^ + --build_shared_lib ^ + --use_qnn $(QnnLibKind) ^ --qnn_home $(QnnSDKRootDir) ^ --test --enable_onnx_tests displayName: 'Run unit tests' @@ -121,7 +129,7 @@ jobs: TargetFolder: '$(Build.ArtifactStagingDirectory)' CleanTargetFolder: true OverWrite: true - condition: and(succeeded(), ne(variables['Build.Reason'], 'PullRequest')) + condition: and(succeeded(), and(ne(variables['Build.Reason'], 'PullRequest'), eq(variables['QnnLibKind'], 'shared_lib'))) - task: PublishBuildArtifacts@1 displayName: 'Publish Artifact' @@ -129,4 +137,4 @@ jobs: PathtoPublish: '$(Build.ArtifactStagingDirectory)' ArtifactName: 'internal_release' publishLocation: 'Container' - condition: and(succeeded(), ne(variables['Build.Reason'], 'PullRequest')) + condition: and(succeeded(), and(ne(variables['Build.Reason'], 'PullRequest'), eq(variables['QnnLibKind'], 'shared_lib'))) diff --git a/tools/ci_build/github/azure-pipelines/win-qnn-ci-pipeline.yml b/tools/ci_build/github/azure-pipelines/win-qnn-ci-pipeline.yml index 53700c58c7e7d..28fbe4a1096b2 100644 --- a/tools/ci_build/github/azure-pipelines/win-qnn-ci-pipeline.yml +++ b/tools/ci_build/github/azure-pipelines/win-qnn-ci-pipeline.yml @@ -33,10 +33,10 @@ parameters: - name: QnnSdk displayName: QNN SDK version type: string - default: 2.28.2.241116 + default: 2.30.0.250109 jobs: -- job: 'build' +- job: 'BUILD_QNN_EP' pool: 'Onnxruntime-QNNEP-Windows-2022-CPU' variables: MsbuildArguments: '-detailedsummary -maxcpucount -consoleloggerparameters:PerformanceSummary' @@ -50,6 +50,12 @@ jobs: timeoutInMinutes: 120 workspace: clean: all + strategy: + matrix: + SHARED_LIB: + QnnLibKind: 'shared_lib' + STATIC_LIB: + QnnLibKind: 'static_lib' steps: - task: UsePythonVersion@0 @@ -72,7 +78,8 @@ jobs: --build_dir $(Build.BinariesDirectory) --cmake_generator "Visual Studio 17 2022" --build_java - --use_qnn + --build_shared_lib + --use_qnn $(QnnLibKind) --qnn_home $(QnnSDKRootDir) --use_binskim_compliant_compile_flags --update --parallel @@ -87,7 +94,8 @@ jobs: --build_dir $(Build.BinariesDirectory) ^ --cmake_generator "Visual Studio 17 2022" ^ --build_java ^ - --use_qnn ^ + --build_shared_lib ^ + --use_qnn $(QnnLibKind) ^ --qnn_home $(QnnSDKRootDir) ^ --use_binskim_compliant_compile_flags ^ --test --enable_onnx_tests diff --git a/tools/ci_build/github/js/react_native_e2e_full_aar_build_settings.json b/tools/ci_build/github/js/react_native_e2e_full_aar_build_settings.json index 3e755ab5aa9db..c93c65f53339f 100644 --- a/tools/ci_build/github/js/react_native_e2e_full_aar_build_settings.json +++ b/tools/ci_build/github/js/react_native_e2e_full_aar_build_settings.json @@ -2,8 +2,8 @@ "build_abis": [ "x86_64" ], - "android_min_sdk_version": 21, - "android_target_sdk_version": 24, + "android_min_sdk_version": 24, + "android_target_sdk_version": 34, "build_params": [ "--android", "--parallel", diff --git a/tools/ci_build/github/linux/build_cuda_c_api_package.sh b/tools/ci_build/github/linux/build_cuda_c_api_package.sh index 9922fc396b3d5..129127c9497db 100755 --- a/tools/ci_build/github/linux/build_cuda_c_api_package.sh +++ b/tools/ci_build/github/linux/build_cuda_c_api_package.sh @@ -2,4 +2,4 @@ set -e -x docker run --rm --volume \ $BUILD_SOURCESDIRECTORY:/onnxruntime_src --volume $BUILD_BINARIESDIRECTORY:/build -e NIGHTLY_BUILD onnxruntimecuda${CUDA_VERSION_MAJOR}build \ -/bin/bash -c "/usr/bin/python3.12 /onnxruntime_src/tools/ci_build/build.py --enable_lto --build_java --build_nodejs --build_dir /build --config Release --skip_submodule_sync --parallel --use_binskim_compliant_compile_flags --build_shared_lib --use_cuda --cuda_version=$CUDA_VERSION --cuda_home=/usr/local/cuda-$CUDA_VERSION --cudnn_home=/usr/local/cuda-$CUDA_VERSION --skip_tests --cmake_extra_defines 'CMAKE_CUDA_ARCHITECTURES=60;61;70;75;80' && cd /build/Release && make install DESTDIR=/build/installed" +/bin/bash -c "/usr/bin/python3 /onnxruntime_src/tools/ci_build/build.py --enable_lto --build_java --build_nodejs --build_dir /build --config Release --skip_submodule_sync --parallel --use_binskim_compliant_compile_flags --build_shared_lib --use_cuda --cuda_version=$CUDA_VERSION --cuda_home=/usr/local/cuda-$CUDA_VERSION --cudnn_home=/usr/local/cuda-$CUDA_VERSION --skip_tests --cmake_extra_defines 'CMAKE_CUDA_ARCHITECTURES=75;80;90' && cd /build/Release && make install DESTDIR=/build/installed" diff --git a/tools/ci_build/github/linux/build_linux_python_package.sh b/tools/ci_build/github/linux/build_linux_python_package.sh index 0dd435f8dbf50..b5999da997589 100755 --- a/tools/ci_build/github/linux/build_linux_python_package.sh +++ b/tools/ci_build/github/linux/build_linux_python_package.sh @@ -70,12 +70,12 @@ fi if [ "$BUILD_DEVICE" == "GPU" ]; then SHORT_CUDA_VERSION=$(echo $CUDA_VERSION | sed 's/\([[:digit:]]\+\.[[:digit:]]\+\)\.[[:digit:]]\+/\1/') #Enable CUDA and TRT EPs. - BUILD_ARGS+=("--use_cuda" "--use_tensorrt" "--cuda_version=$SHORT_CUDA_VERSION" "--tensorrt_home=/usr" "--cuda_home=/usr/local/cuda-$SHORT_CUDA_VERSION" "--cudnn_home=/usr/local/cuda-$SHORT_CUDA_VERSION" "--cmake_extra_defines" "CMAKE_CUDA_ARCHITECTURES=52;60;61;70;75;80") + BUILD_ARGS+=("--use_cuda" "--use_tensorrt" "--cuda_version=$SHORT_CUDA_VERSION" "--tensorrt_home=/usr" "--cuda_home=/usr/local/cuda-$SHORT_CUDA_VERSION" "--cudnn_home=/usr/local/cuda-$SHORT_CUDA_VERSION" "--cmake_extra_defines" "CMAKE_CUDA_ARCHITECTURES=75;80;90") fi if [ "$BUILD_DEVICE" == "NPU" ]; then #Enable QNN EP - BUILD_ARGS+=("--use_qnn" "--qnn_home=/qnn_sdk") + BUILD_ARGS+=("--build_shared_lib" "--use_qnn" "--qnn_home=/qnn_sdk") fi export ONNX_ML=1 diff --git a/tools/ci_build/github/linux/build_tensorrt_c_api_package.sh b/tools/ci_build/github/linux/build_tensorrt_c_api_package.sh index 7f18e2f849d27..58ea3054afdda 100755 --- a/tools/ci_build/github/linux/build_tensorrt_c_api_package.sh +++ b/tools/ci_build/github/linux/build_tensorrt_c_api_package.sh @@ -3,4 +3,4 @@ set -e -x mkdir -p $HOME/.onnx docker run --rm --volume /data/onnx:/data/onnx:ro --volume $BUILD_SOURCESDIRECTORY:/onnxruntime_src --volume $BUILD_BINARIESDIRECTORY:/build \ --volume /data/models:/build/models:ro --volume $HOME/.onnx:/home/onnxruntimedev/.onnx -e NIGHTLY_BUILD onnxruntimecuda${CUDA_VERSION_MAJOR}xtrt86build \ -/bin/bash -c "/usr/bin/python3.12 /onnxruntime_src/tools/ci_build/build.py --build_dir /build --config Release --skip_tests --skip_submodule_sync --parallel --use_binskim_compliant_compile_flags --build_shared_lib --build_java --build_nodejs --use_tensorrt --cuda_version=$CUDA_VERSION --cuda_home=/usr/local/cuda-$CUDA_VERSION --cudnn_home=/usr --tensorrt_home=/usr --cmake_extra_defines 'CMAKE_CUDA_ARCHITECTURES=60;61;70;75;80' && cd /build/Release && make install DESTDIR=/build/installed" +/bin/bash -c "/usr/bin/python3 /onnxruntime_src/tools/ci_build/build.py --build_dir /build --config Release --skip_tests --skip_submodule_sync --parallel --use_binskim_compliant_compile_flags --build_shared_lib --build_java --build_nodejs --use_tensorrt --cuda_version=$CUDA_VERSION --cuda_home=/usr/local/cuda-$CUDA_VERSION --cudnn_home=/usr --tensorrt_home=/usr --cmake_extra_defines 'CMAKE_CUDA_ARCHITECTURES=75;80;90' && cd /build/Release && make install DESTDIR=/build/installed" diff --git a/tools/ci_build/github/linux/docker/Dockerfile.manylinux2_28_cpu b/tools/ci_build/github/linux/docker/Dockerfile.manylinux2_28_cpu index ee62a7da67b30..02938f015ec57 100644 --- a/tools/ci_build/github/linux/docker/Dockerfile.manylinux2_28_cpu +++ b/tools/ci_build/github/linux/docker/Dockerfile.manylinux2_28_cpu @@ -1,4 +1,4 @@ -FROM onnxruntimebuildcache.azurecr.io/internal/azureml/onnxruntime/build/cpu_x64_ubi8_gcc14:20250108.1 +FROM onnxruntimebuildcache.azurecr.io/internal/azureml/onnxruntime/build/cpu_x64_ubi8_gcc14:20250124.1 ENV JAVA_HOME=/usr/lib/jvm/msopenjdk-17 diff --git a/tools/ci_build/github/linux/docker/inference/aarch64/default/cpu/Dockerfile b/tools/ci_build/github/linux/docker/inference/aarch64/default/cpu/Dockerfile index fc2eb787b2b46..f9d84e3b0e130 100644 --- a/tools/ci_build/github/linux/docker/inference/aarch64/default/cpu/Dockerfile +++ b/tools/ci_build/github/linux/docker/inference/aarch64/default/cpu/Dockerfile @@ -2,7 +2,7 @@ # Licensed under the MIT License. # This file is used by Zip-Nuget Packaging NoContribOps Pipeline,Zip-Nuget-Java Packaging Pipeline -FROM onnxruntimebuildcache.azurecr.io/internal/azureml/onnxruntime/build/cpu_aarch64_ubi8_gcc14_dotnet:20250108.1 +FROM onnxruntimebuildcache.azurecr.io/internal/azureml/onnxruntime/build/cpu_aarch64_ubi8_gcc14_dotnet:20250124.1 ENV LANG=en_US.UTF-8 ENV LC_ALL=en_US.UTF-8 diff --git a/tools/ci_build/github/linux/docker/inference/aarch64/python/cpu/Dockerfile b/tools/ci_build/github/linux/docker/inference/aarch64/python/cpu/Dockerfile index e0bca907998bb..20b9a6c224120 100644 --- a/tools/ci_build/github/linux/docker/inference/aarch64/python/cpu/Dockerfile +++ b/tools/ci_build/github/linux/docker/inference/aarch64/python/cpu/Dockerfile @@ -1,4 +1,4 @@ -FROM onnxruntimebuildcache.azurecr.io/internal/azureml/onnxruntime/build/cpu_aarch64_ubi8_gcc14:20250108.1 +FROM onnxruntimebuildcache.azurecr.io/internal/azureml/onnxruntime/build/cpu_aarch64_ubi8_gcc14:20250124.1 ADD scripts /tmp/scripts RUN cd /tmp/scripts && /tmp/scripts/install_centos.sh && /tmp/scripts/install_deps.sh && rm -rf /tmp/scripts diff --git a/tools/ci_build/github/linux/docker/inference/aarch64/python/cpu/scripts/requirements.txt b/tools/ci_build/github/linux/docker/inference/aarch64/python/cpu/scripts/requirements.txt index a0c9a4326aec3..f5299584da8a2 100644 --- a/tools/ci_build/github/linux/docker/inference/aarch64/python/cpu/scripts/requirements.txt +++ b/tools/ci_build/github/linux/docker/inference/aarch64/python/cpu/scripts/requirements.txt @@ -4,7 +4,7 @@ mypy pytest setuptools>=68.2.2 wheel -onnx==1.17.0 +onnx==1.17.0 ; python_version < '3.13' protobuf==4.21.12 sympy==1.12 flatbuffers diff --git a/tools/ci_build/github/linux/docker/inference/x86_64/default/cpu/Dockerfile b/tools/ci_build/github/linux/docker/inference/x86_64/default/cpu/Dockerfile index 55442b51eb5db..d94e7562f19d4 100644 --- a/tools/ci_build/github/linux/docker/inference/x86_64/default/cpu/Dockerfile +++ b/tools/ci_build/github/linux/docker/inference/x86_64/default/cpu/Dockerfile @@ -2,7 +2,7 @@ # Licensed under the MIT License. # This file is used by Zip-Nuget Packaging NoContribOps Pipeline,Zip-Nuget-Java Packaging Pipeline -FROM onnxruntimebuildcache.azurecr.io/internal/azureml/onnxruntime/build/cpu_x64_ubi8_gcc14_dotnet:20250108.1 +FROM onnxruntimebuildcache.azurecr.io/internal/azureml/onnxruntime/build/cpu_x64_ubi8_gcc14_dotnet:20250124.1 ENV LANG=en_US.UTF-8 ENV LC_ALL=en_US.UTF-8 diff --git a/tools/ci_build/github/linux/docker/inference/x86_64/default/cuda11/Dockerfile b/tools/ci_build/github/linux/docker/inference/x86_64/default/cuda11/Dockerfile index 369eacee4740e..24287fd34d3ea 100644 --- a/tools/ci_build/github/linux/docker/inference/x86_64/default/cuda11/Dockerfile +++ b/tools/ci_build/github/linux/docker/inference/x86_64/default/cuda11/Dockerfile @@ -2,7 +2,7 @@ # Licensed under the MIT License. # This file is used by Zip-Nuget Packaging NoContribOps Pipeline,Zip-Nuget-Java Packaging Pipeline -FROM onnxruntimebuildcache.azurecr.io/internal/azureml/onnxruntime/build/cuda11_x64_almalinux8_gcc11_dotnet:20250108.1 +FROM onnxruntimebuildcache.azurecr.io/internal/azureml/onnxruntime/build/cuda11_x64_almalinux8_gcc11_dotnet:20250124.1 ARG TRT_VERSION #Install TensorRT only if TRT_VERSION is not empty diff --git a/tools/ci_build/github/linux/docker/inference/x86_64/default/cuda12/Dockerfile b/tools/ci_build/github/linux/docker/inference/x86_64/default/cuda12/Dockerfile index f27fd814c7fa7..764a79135d7a3 100644 --- a/tools/ci_build/github/linux/docker/inference/x86_64/default/cuda12/Dockerfile +++ b/tools/ci_build/github/linux/docker/inference/x86_64/default/cuda12/Dockerfile @@ -2,7 +2,7 @@ # Licensed under the MIT License. # This file is used by Zip-Nuget Packaging NoContribOps Pipeline,Zip-Nuget-Java Packaging Pipeline -FROM onnxruntimebuildcache.azurecr.io/internal/azureml/onnxruntime/build/cuda12_x64_ubi8_gcc12_dotnet:20250108.1 +FROM onnxruntimebuildcache.azurecr.io/internal/azureml/onnxruntime/build/cuda12_x64_ubi8_gcc12_dotnet:20250124.1 ARG TRT_VERSION #Install TensorRT only if TRT_VERSION is not empty diff --git a/tools/ci_build/github/linux/docker/inference/x86_64/python/cpu/Dockerfile b/tools/ci_build/github/linux/docker/inference/x86_64/python/cpu/Dockerfile index 4baa6eb0ec214..7590d5dd18347 100644 --- a/tools/ci_build/github/linux/docker/inference/x86_64/python/cpu/Dockerfile +++ b/tools/ci_build/github/linux/docker/inference/x86_64/python/cpu/Dockerfile @@ -1,4 +1,4 @@ -FROM onnxruntimebuildcache.azurecr.io/internal/azureml/onnxruntime/build/cpu_x64_ubi8_gcc14:20250108.1 +FROM onnxruntimebuildcache.azurecr.io/internal/azureml/onnxruntime/build/cpu_x64_ubi8_gcc14:20250124.1 ADD scripts /tmp/scripts RUN cd /tmp/scripts && /tmp/scripts/install_centos.sh && rm -rf /tmp/scripts diff --git a/tools/ci_build/github/linux/docker/scripts/lort/requirements.txt b/tools/ci_build/github/linux/docker/scripts/lort/requirements.txt index 99d0e9d10285b..5c8cf707927f9 100644 --- a/tools/ci_build/github/linux/docker/scripts/lort/requirements.txt +++ b/tools/ci_build/github/linux/docker/scripts/lort/requirements.txt @@ -3,7 +3,7 @@ beartype==0.15.0 flatbuffers cerberus h5py -onnx==1.17.0 +onnx==1.17.0 ; python_version < '3.13' # Python dependencies required for pytorch development astunparse expecttest!=0.2.0 diff --git a/tools/ci_build/github/linux/docker/scripts/manylinux/requirements.txt b/tools/ci_build/github/linux/docker/scripts/manylinux/requirements.txt index 1261498679ea0..40dd9637747bf 100644 --- a/tools/ci_build/github/linux/docker/scripts/manylinux/requirements.txt +++ b/tools/ci_build/github/linux/docker/scripts/manylinux/requirements.txt @@ -4,7 +4,7 @@ mypy pytest setuptools>=68.2.2 wheel -onnx==1.17.0 +onnx==1.17.0 ; python_version < '3.13' protobuf==4.21.12 sympy==1.12 ; python_version < '3.9' sympy==1.13 ; python_version >= '3.9' diff --git a/tools/ci_build/github/linux/docker/scripts/requirements.txt b/tools/ci_build/github/linux/docker/scripts/requirements.txt index 157e1658a09a4..ee5cedb73ff04 100644 --- a/tools/ci_build/github/linux/docker/scripts/requirements.txt +++ b/tools/ci_build/github/linux/docker/scripts/requirements.txt @@ -5,7 +5,7 @@ mypy pytest setuptools==69.0.3 wheel==0.42.0 -onnx==1.17.0 +onnx==1.17.0 ; python_version < '3.13' argparse sympy==1.12 flatbuffers diff --git a/tools/ci_build/github/linux/ort_minimal/build_full_ort_and_create_ort_files.sh b/tools/ci_build/github/linux/ort_minimal/build_full_ort_and_create_ort_files.sh index aef9793f696b6..614eb3a631e13 100755 --- a/tools/ci_build/github/linux/ort_minimal/build_full_ort_and_create_ort_files.sh +++ b/tools/ci_build/github/linux/ort_minimal/build_full_ort_and_create_ort_files.sh @@ -5,10 +5,10 @@ set -e set -x -export PATH=/opt/python/cp310-cp310/bin:$PATH BUILD_DIR=${1:?"usage: $0 "} +python3 -m pip install -r /onnxruntime_src/tools/ci_build/github/linux/python/requirements.txt # Validate the operator kernel registrations, as the ORT model uses hashes of the kernel registration details # to find kernels. If the hashes from the registration details are incorrect we will produce a model that will break # when the registration is fixed in the future. @@ -26,7 +26,6 @@ python3 /onnxruntime_src/tools/ci_build/build.py \ --build_wheel \ --skip_tests \ --enable_training_ops \ - --enable_pybind --cmake_extra_defines PYTHON_INCLUDE_DIR=/opt/python/cp310-cp310/include/python3.10 PYTHON_LIBRARY=/usr/lib64/librt.so \ --use_nnapi \ --use_coreml diff --git a/tools/ci_build/github/linux/ort_minimal/build_minimal_ort_and_run_tests.sh b/tools/ci_build/github/linux/ort_minimal/build_minimal_ort_and_run_tests.sh index c857d3f1036bc..f5184b20d0a6c 100755 --- a/tools/ci_build/github/linux/ort_minimal/build_minimal_ort_and_run_tests.sh +++ b/tools/ci_build/github/linux/ort_minimal/build_minimal_ort_and_run_tests.sh @@ -7,7 +7,7 @@ set -e set -x -export PATH=/opt/python/cp310-cp310/bin:$PATH + USAGE_TEXT="Usage: -b|--build-directory Specifies the build directory. Required. @@ -65,7 +65,7 @@ if [[ -z "${BUILD_DIR}" || -z "${REDUCED_OPS_CONFIG_FILE}" ]]; then echo "$USAGE_TEXT" exit 1 fi - +python3 -m pip install -r /onnxruntime_src/tools/ci_build/github/linux/python/requirements.txt # Perform a minimal build with required ops and run ORT minimal build UTs python3 /onnxruntime_src/tools/ci_build/build.py \ --build_dir ${BUILD_DIR} --cmake_generator Ninja \ diff --git a/tools/ci_build/github/linux/python/requirements.txt b/tools/ci_build/github/linux/python/requirements.txt index 200b9c2e50288..36b30ed6d3d12 100644 --- a/tools/ci_build/github/linux/python/requirements.txt +++ b/tools/ci_build/github/linux/python/requirements.txt @@ -3,7 +3,7 @@ mypy pytest setuptools>=68.2.2 wheel -onnx==1.17.0 +onnx==1.17.0 ; python_version < '3.13' protobuf==4.21.12 sympy==1.12 flatbuffers diff --git a/tools/ci_build/hipify-perl b/tools/ci_build/hipify-perl new file mode 100755 index 0000000000000..ac1393cf0da7d --- /dev/null +++ b/tools/ci_build/hipify-perl @@ -0,0 +1,13168 @@ +#!/usr/bin/env perl + +## +# Copyright (c) 2015-present Advanced Micro Devices, Inc. All rights reserved. +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +# THE SOFTWARE. +## + +# IMPORTANT: Do not change this file manually: it is generated by hipify-clang --perl + +# USAGE +# +my $USAGE =< \$cuda_kernel_execution_syntax # Keep CUDA kernel launch syntax (default) + , "examine" => \$examine # Combines -no-output and -print-stats options + , "exclude-dirs=s" => \$exclude_dirs # Exclude directories + , "exclude-files=s" => \$exclude_files # Exclude files + , "experimental" => \$experimental # HIPIFY experimentally supported APIs + , "help" => \$help # Display available options + , "hip-kernel-execution-syntax" => \$hip_kernel_execution_syntax # Transform CUDA kernel launch syntax to a regular HIP function call (overrides "--cuda-kernel-execution-syntax") + , "inplace" => \$inplace # Backup the input file in .prehip file, modify the input file inplace + , "no-output" => \$no_output # Don't write any translated output to stdout + , "o=s" => \$hipFileName # Output filename + , "print-stats" => \$print_stats # Print translation statistics + , "quiet-warnings" => \$quiet_warnings # Don't print warnings on unknown CUDA identifiers + , "roc" => \$roc # Translate to roc instead of hip where it is possible + , "version" => \$version # The supported HIP version + , "whitelist=s" => \$whitelist # Whitelist of identifiers +); + +$cuda_kernel_execution_syntax = 1; + +my %deprecated_funcs = ( + "pruneInfo_t" => "12.2", + "pruneInfo" => "12.2", + "nvrtcGetNVVMSize" => "12.0", + "nvrtcGetNVVM" => "12.0", + "cusparseZsctr" => "11.0", + "cusparseZnnz_compress" => "12.2", + "cusparseZhybsv_solve" => "10.2", + "cusparseZhybsv_analysis" => "10.2", + "cusparseZhybmv" => "10.2", + "cusparseZhyb2dense" => "10.2", + "cusparseZhyb2csr" => "10.2", + "cusparseZhyb2csc" => "10.2", + "cusparseZgtsv_nopivot" => "10.2", + "cusparseZgtsvStridedBatch" => "10.2", + "cusparseZgtsv" => "10.2", + "cusparseZgthrz" => "11.0", + "cusparseZgthr" => "11.0", + "cusparseZgemmi" => "11.0", + "cusparseZgebsr2csr" => "12.4", + "cusparseZdoti" => "10.2", + "cusparseZdotci" => "10.2", + "cusparseZdense2hyb" => "10.2", + "cusparseZdense2csr" => "11.1", + "cusparseZdense2csc" => "11.1", + "cusparseZcsru2csr_bufferSizeExt" => "12.2", + "cusparseZcsru2csr" => "12.2", + "cusparseZcsrsv_solve" => "10.2", + "cusparseZcsrsv_analysis" => "10.2", + "cusparseZcsrsv2_solve" => "11.3", + "cusparseZcsrsv2_bufferSizeExt" => "11.3", + "cusparseZcsrsv2_bufferSize" => "11.3", + "cusparseZcsrsv2_analysis" => "11.3", + "cusparseZcsrsm_solve" => "10.2", + "cusparseZcsrsm_analysis" => "10.2", + "cusparseZcsrsm2_solve" => "11.3", + "cusparseZcsrsm2_bufferSizeExt" => "11.3", + "cusparseZcsrsm2_analysis" => "11.3", + "cusparseZcsrmv_mp" => "10.2", + "cusparseZcsrmv" => "10.2", + "cusparseZcsrmm2" => "10.2", + "cusparseZcsrmm" => "10.2", + "cusparseZcsrilu02_numericBoost" => "12.2", + "cusparseZcsrilu02_bufferSizeExt" => "12.2", + "cusparseZcsrilu02_bufferSize" => "12.2", + "cusparseZcsrilu02_analysis" => "12.2", + "cusparseZcsrilu02" => "12.2", + "cusparseZcsrilu0" => "10.2", + "cusparseZcsric02_bufferSizeExt" => "12.2", + "cusparseZcsric02_bufferSize" => "12.2", + "cusparseZcsric02_analysis" => "12.2", + "cusparseZcsric02" => "12.2", + "cusparseZcsric0" => "10.2", + "cusparseZcsrgemm2_bufferSizeExt" => "11.0", + "cusparseZcsrgemm2" => "11.0", + "cusparseZcsrgemm" => "10.2", + "cusparseZcsrgeam" => "10.2", + "cusparseZcsrcolor" => "12.2", + "cusparseZcsr2hyb" => "10.2", + "cusparseZcsr2dense" => "11.1", + "cusparseZcsr2csru" => "12.2", + "cusparseZcsr2csr_compress" => "12.2", + "cusparseZcsr2csc" => "10.2", + "cusparseZcsr2bsr" => "12.4", + "cusparseZcsc2hyb" => "10.2", + "cusparseZcsc2dense" => "11.1", + "cusparseZbsrxmv" => "12.2", + "cusparseZbsrsv2_solve" => "12.2", + "cusparseZbsrsv2_bufferSizeExt" => "12.2", + "cusparseZbsrsv2_bufferSize" => "12.2", + "cusparseZbsrsv2_analysis" => "12.2", + "cusparseZbsrsm2_solve" => "12.2", + "cusparseZbsrsm2_bufferSizeExt" => "12.2", + "cusparseZbsrsm2_bufferSize" => "12.2", + "cusparseZbsrsm2_analysis" => "12.2", + "cusparseZbsrilu02_numericBoost" => "12.2", + "cusparseZbsrilu02_bufferSizeExt" => "12.2", + "cusparseZbsrilu02_bufferSize" => "12.2", + "cusparseZbsrilu02_analysis" => "12.2", + "cusparseZbsrilu02" => "12.2", + "cusparseZbsric02_bufferSizeExt" => "12.2", + "cusparseZbsric02_bufferSize" => "12.2", + "cusparseZbsric02_analysis" => "12.2", + "cusparseZbsric02" => "12.2", + "cusparseZaxpyi" => "11.0", + "cusparseXgebsr2csr" => "12.4", + "cusparseXcsrsv2_zeroPivot" => "11.3", + "cusparseXcsrsm2_zeroPivot" => "11.3", + "cusparseXcsrilu02_zeroPivot" => "12.2", + "cusparseXcsric02_zeroPivot" => "12.2", + "cusparseXcsrgemmNnz" => "10.2", + "cusparseXcsrgemm2Nnz" => "11.0", + "cusparseXcsrgeamNnz" => "10.2", + "cusparseXcsr2bsrNnz" => "12.4", + "cusparseXbsrsv2_zeroPivot" => "12.2", + "cusparseXbsrsm2_zeroPivot" => "12.2", + "cusparseXbsrilu02_zeroPivot" => "12.2", + "cusparseXbsric02_zeroPivot" => "12.2", + "cusparseSsctr" => "11.0", + "cusparseSroti" => "11.0", + "cusparseSpruneDense2csr_bufferSizeExt" => "12.2", + "cusparseSpruneDense2csrNnzByPercentage" => "12.2", + "cusparseSpruneDense2csrNnz" => "12.2", + "cusparseSpruneDense2csrByPercentage_bufferSizeExt" => "12.2", + "cusparseSpruneDense2csrByPercentage" => "12.2", + "cusparseSpruneDense2csr" => "12.2", + "cusparseSpruneCsr2csr_bufferSizeExt" => "12.2", + "cusparseSpruneCsr2csrNnzByPercentage" => "12.2", + "cusparseSpruneCsr2csrNnz" => "12.2", + "cusparseSpruneCsr2csrByPercentage_bufferSizeExt" => "12.2", + "cusparseSpruneCsr2csrByPercentage" => "12.2", + "cusparseSpruneCsr2csr" => "12.2", + "cusparseSolvePolicy_t" => "12.2", + "cusparseSolveAnalysisInfo_t" => "10.2", + "cusparseSolveAnalysisInfo" => "10.2", + "cusparseSnnz_compress" => "12.2", + "cusparseShybsv_solve" => "10.2", + "cusparseShybsv_analysis" => "10.2", + "cusparseShybmv" => "10.2", + "cusparseShyb2dense" => "10.2", + "cusparseShyb2csr" => "10.2", + "cusparseShyb2csc" => "10.2", + "cusparseSgtsv_nopivot" => "10.2", + "cusparseSgtsvStridedBatch" => "10.2", + "cusparseSgtsv" => "10.2", + "cusparseSgthrz" => "11.0", + "cusparseSgthr" => "11.0", + "cusparseSgemmi" => "11.0", + "cusparseSgebsr2csr" => "12.4", + "cusparseSdoti" => "10.2", + "cusparseSdense2hyb" => "10.2", + "cusparseSdense2csr" => "11.1", + "cusparseSdense2csc" => "11.1", + "cusparseScsru2csr_bufferSizeExt" => "12.2", + "cusparseScsru2csr" => "12.2", + "cusparseScsrsv_solve" => "10.2", + "cusparseScsrsv_analysis" => "10.2", + "cusparseScsrsv2_solve" => "11.3", + "cusparseScsrsv2_bufferSizeExt" => "11.3", + "cusparseScsrsv2_bufferSize" => "11.3", + "cusparseScsrsv2_analysis" => "11.3", + "cusparseScsrsm_solve" => "10.2", + "cusparseScsrsm_analysis" => "10.2", + "cusparseScsrsm2_solve" => "11.3", + "cusparseScsrsm2_bufferSizeExt" => "11.3", + "cusparseScsrsm2_analysis" => "11.3", + "cusparseScsrmv_mp" => "10.2", + "cusparseScsrmv" => "10.2", + "cusparseScsrmm2" => "10.2", + "cusparseScsrmm" => "10.2", + "cusparseScsrilu02_numericBoost" => "12.2", + "cusparseScsrilu02_bufferSizeExt" => "12.2", + "cusparseScsrilu02_bufferSize" => "12.2", + "cusparseScsrilu02_analysis" => "12.2", + "cusparseScsrilu02" => "12.2", + "cusparseScsrilu0" => "10.2", + "cusparseScsric02_bufferSizeExt" => "12.2", + "cusparseScsric02_bufferSize" => "12.2", + "cusparseScsric02_analysis" => "12.2", + "cusparseScsric02" => "12.2", + "cusparseScsric0" => "10.2", + "cusparseScsrgemm2_bufferSizeExt" => "11.0", + "cusparseScsrgemm2" => "11.0", + "cusparseScsrgemm" => "10.2", + "cusparseScsrgeam" => "10.2", + "cusparseScsrcolor" => "12.2", + "cusparseScsr2hyb" => "10.2", + "cusparseScsr2dense" => "11.1", + "cusparseScsr2csru" => "12.2", + "cusparseScsr2csr_compress" => "12.2", + "cusparseScsr2csc" => "10.2", + "cusparseScsr2bsr" => "12.4", + "cusparseScsc2hyb" => "10.2", + "cusparseScsc2dense" => "11.1", + "cusparseSbsrxmv" => "12.2", + "cusparseSbsrsv2_solve" => "12.2", + "cusparseSbsrsv2_bufferSizeExt" => "12.2", + "cusparseSbsrsv2_bufferSize" => "12.2", + "cusparseSbsrsv2_analysis" => "12.2", + "cusparseSbsrsm2_solve" => "12.2", + "cusparseSbsrsm2_bufferSizeExt" => "12.2", + "cusparseSbsrsm2_bufferSize" => "12.2", + "cusparseSbsrsm2_analysis" => "12.2", + "cusparseSbsrilu02_numericBoost" => "12.2", + "cusparseSbsrilu02_bufferSizeExt" => "12.2", + "cusparseSbsrilu02_bufferSize" => "12.2", + "cusparseSbsrilu02_analysis" => "12.2", + "cusparseSbsrilu02" => "12.2", + "cusparseSbsric02_bufferSizeExt" => "12.2", + "cusparseSbsric02_bufferSize" => "12.2", + "cusparseSbsric02_analysis" => "12.2", + "cusparseSbsric02" => "12.2", + "cusparseSaxpyi" => "11.0", + "cusparseRot" => "12.2", + "cusparseHybPartition_t" => "10.2", + "cusparseHybMat_t" => "10.2", + "cusparseHybMat" => "10.2", + "cusparseHpruneDense2csr_bufferSizeExt" => "12.2", + "cusparseHpruneDense2csrNnzByPercentage" => "12.2", + "cusparseHpruneDense2csrNnz" => "12.2", + "cusparseHpruneDense2csrByPercentage_bufferSizeExt" => "12.2", + "cusparseHpruneDense2csrByPercentage" => "12.2", + "cusparseHpruneDense2csr" => "12.2", + "cusparseHpruneCsr2csr_bufferSizeExt" => "12.2", + "cusparseHpruneCsr2csrNnzByPercentage" => "12.2", + "cusparseHpruneCsr2csrNnz" => "12.2", + "cusparseHpruneCsr2csrByPercentage_bufferSizeExt" => "12.2", + "cusparseHpruneCsr2csrByPercentage" => "12.2", + "cusparseHpruneCsr2csr" => "12.2", + "cusparseDsctr" => "11.0", + "cusparseDroti" => "11.0", + "cusparseDpruneDense2csr_bufferSizeExt" => "12.2", + "cusparseDpruneDense2csrNnzByPercentage" => "12.2", + "cusparseDpruneDense2csrNnz" => "12.2", + "cusparseDpruneDense2csrByPercentage_bufferSizeExt" => "12.2", + "cusparseDpruneDense2csrByPercentage" => "12.2", + "cusparseDpruneDense2csr" => "12.2", + "cusparseDpruneCsr2csr_bufferSizeExt" => "12.2", + "cusparseDpruneCsr2csrNnzByPercentage" => "12.2", + "cusparseDpruneCsr2csrNnz" => "12.2", + "cusparseDpruneCsr2csrByPercentage_bufferSizeExt" => "12.2", + "cusparseDpruneCsr2csrByPercentage" => "12.2", + "cusparseDpruneCsr2csr" => "12.2", + "cusparseDnnz_compress" => "12.2", + "cusparseDhybsv_solve" => "10.2", + "cusparseDhybsv_analysis" => "10.2", + "cusparseDhybmv" => "10.2", + "cusparseDhyb2dense" => "10.2", + "cusparseDhyb2csr" => "10.2", + "cusparseDhyb2csc" => "10.2", + "cusparseDgtsv_nopivot" => "10.2", + "cusparseDgtsvStridedBatch" => "10.2", + "cusparseDgtsv" => "10.2", + "cusparseDgthrz" => "11.0", + "cusparseDgthr" => "11.0", + "cusparseDgemmi" => "11.0", + "cusparseDgebsr2csr" => "12.4", + "cusparseDestroySolveAnalysisInfo" => "10.2", + "cusparseDestroyPruneInfo" => "12.2", + "cusparseDestroyHybMat" => "10.2", + "cusparseDestroyCsru2csrInfo" => "12.2", + "cusparseDestroyCsrsv2Info" => "11.3", + "cusparseDestroyCsrsm2Info" => "11.3", + "cusparseDestroyCsrilu02Info" => "12.2", + "cusparseDestroyCsric02Info" => "12.2", + "cusparseDestroyCsrgemm2Info" => "11.0", + "cusparseDestroyColorInfo" => "12.2", + "cusparseDestroyBsrsv2Info" => "12.2", + "cusparseDestroyBsrsm2Info" => "12.2", + "cusparseDestroyBsrilu02Info" => "12.2", + "cusparseDestroyBsric02Info" => "12.2", + "cusparseDdoti" => "10.2", + "cusparseDdense2hyb" => "10.2", + "cusparseDdense2csr" => "11.1", + "cusparseDdense2csc" => "11.1", + "cusparseDcsru2csr_bufferSizeExt" => "12.2", + "cusparseDcsru2csr" => "12.2", + "cusparseDcsrsv_solve" => "10.2", + "cusparseDcsrsv_analysis" => "10.2", + "cusparseDcsrsv2_solve" => "11.3", + "cusparseDcsrsv2_bufferSizeExt" => "11.3", + "cusparseDcsrsv2_bufferSize" => "11.3", + "cusparseDcsrsv2_analysis" => "11.3", + "cusparseDcsrsm_solve" => "10.2", + "cusparseDcsrsm_analysis" => "10.2", + "cusparseDcsrsm2_solve" => "11.3", + "cusparseDcsrsm2_bufferSizeExt" => "11.3", + "cusparseDcsrsm2_analysis" => "11.3", + "cusparseDcsrmv_mp" => "10.2", + "cusparseDcsrmv" => "10.2", + "cusparseDcsrmm2" => "10.2", + "cusparseDcsrmm" => "10.2", + "cusparseDcsrilu02_numericBoost" => "12.2", + "cusparseDcsrilu02_bufferSizeExt" => "12.2", + "cusparseDcsrilu02_bufferSize" => "12.2", + "cusparseDcsrilu02_analysis" => "12.2", + "cusparseDcsrilu02" => "12.2", + "cusparseDcsrilu0" => "10.2", + "cusparseDcsric02_bufferSizeExt" => "12.2", + "cusparseDcsric02_bufferSize" => "12.2", + "cusparseDcsric02_analysis" => "12.2", + "cusparseDcsric02" => "12.2", + "cusparseDcsric0" => "10.2", + "cusparseDcsrgemm2_bufferSizeExt" => "11.0", + "cusparseDcsrgemm2" => "11.0", + "cusparseDcsrgemm" => "10.2", + "cusparseDcsrgeam" => "10.2", + "cusparseDcsrcolor" => "12.2", + "cusparseDcsr2hyb" => "10.2", + "cusparseDcsr2dense" => "11.1", + "cusparseDcsr2csru" => "12.2", + "cusparseDcsr2csr_compress" => "12.2", + "cusparseDcsr2csc" => "10.2", + "cusparseDcsr2bsr" => "12.4", + "cusparseDcsc2hyb" => "10.2", + "cusparseDcsc2dense" => "11.1", + "cusparseDbsrxmv" => "12.2", + "cusparseDbsrsv2_solve" => "12.2", + "cusparseDbsrsv2_bufferSizeExt" => "12.2", + "cusparseDbsrsv2_bufferSize" => "12.2", + "cusparseDbsrsv2_analysis" => "12.2", + "cusparseDbsrsm2_solve" => "12.2", + "cusparseDbsrsm2_bufferSizeExt" => "12.2", + "cusparseDbsrsm2_bufferSize" => "12.2", + "cusparseDbsrsm2_analysis" => "12.2", + "cusparseDbsrilu02_numericBoost" => "12.2", + "cusparseDbsrilu02_bufferSizeExt" => "12.2", + "cusparseDbsrilu02_bufferSize" => "12.2", + "cusparseDbsrilu02_analysis" => "12.2", + "cusparseDbsrilu02" => "12.2", + "cusparseDbsric02_bufferSizeExt" => "12.2", + "cusparseDbsric02_bufferSize" => "12.2", + "cusparseDbsric02_analysis" => "12.2", + "cusparseDbsric02" => "12.2", + "cusparseDaxpyi" => "11.0", + "cusparseCsrsv_solveEx" => "10.2", + "cusparseCsrsv_analysisEx" => "10.2", + "cusparseCsrmvEx_bufferSize" => "11.2", + "cusparseCsrmvEx" => "11.2", + "cusparseCsrilu0Ex" => "10.2", + "cusparseCsr2cscEx" => "10.2", + "cusparseCsctr" => "11.0", + "cusparseCreateSolveAnalysisInfo" => "10.2", + "cusparseCreatePruneInfo" => "12.2", + "cusparseCreateIdentityPermutation" => "12.2", + "cusparseCreateHybMat" => "10.2", + "cusparseCreateCsru2csrInfo" => "12.2", + "cusparseCreateCsrsv2Info" => "11.3", + "cusparseCreateCsrsm2Info" => "11.3", + "cusparseCreateCsrilu02Info" => "12.2", + "cusparseCreateCsric02Info" => "12.2", + "cusparseCreateCsrgemm2Info" => "11.0", + "cusparseCreateCooAoS" => "11.2", + "cusparseCreateColorInfo" => "12.2", + "cusparseCreateBsrsv2Info" => "12.2", + "cusparseCreateBsrsm2Info" => "12.2", + "cusparseCreateBsrilu02Info" => "12.2", + "cusparseCreateBsric02Info" => "12.2", + "cusparseCooAoSGet" => "11.2", + "cusparseConstrainedGeMM_bufferSize" => "11.2", + "cusparseConstrainedGeMM" => "11.2", + "cusparseColorInfo_t" => "12.2", + "cusparseColorInfo" => "12.2", + "cusparseColorAlg_t" => "12.2", + "cusparseCnnz_compress" => "12.2", + "cusparseChybsv_solve" => "10.2", + "cusparseChybsv_analysis" => "10.2", + "cusparseChybmv" => "10.2", + "cusparseChyb2dense" => "10.2", + "cusparseChyb2csr" => "10.2", + "cusparseChyb2csc" => "10.2", + "cusparseCgtsv_nopivot" => "10.2", + "cusparseCgtsvStridedBatch" => "10.2", + "cusparseCgtsv" => "10.2", + "cusparseCgthrz" => "11.0", + "cusparseCgthr" => "11.0", + "cusparseCgemmi" => "11.0", + "cusparseCgebsr2csr" => "12.4", + "cusparseCdoti" => "10.2", + "cusparseCdotci" => "10.2", + "cusparseCdense2hyb" => "10.2", + "cusparseCdense2csr" => "11.1", + "cusparseCdense2csc" => "11.1", + "cusparseCcsru2csr_bufferSizeExt" => "12.2", + "cusparseCcsru2csr" => "12.2", + "cusparseCcsrsv_solve" => "10.2", + "cusparseCcsrsv_analysis" => "10.2", + "cusparseCcsrsv2_solve" => "11.3", + "cusparseCcsrsv2_bufferSizeExt" => "11.3", + "cusparseCcsrsv2_bufferSize" => "11.3", + "cusparseCcsrsv2_analysis" => "11.3", + "cusparseCcsrsm_solve" => "10.2", + "cusparseCcsrsm_analysis" => "10.2", + "cusparseCcsrsm2_solve" => "11.3", + "cusparseCcsrsm2_bufferSizeExt" => "11.3", + "cusparseCcsrsm2_analysis" => "11.3", + "cusparseCcsrmv_mp" => "10.2", + "cusparseCcsrmv" => "10.2", + "cusparseCcsrmm2" => "10.2", + "cusparseCcsrmm" => "10.2", + "cusparseCcsrilu02_numericBoost" => "12.2", + "cusparseCcsrilu02_bufferSizeExt" => "12.2", + "cusparseCcsrilu02_bufferSize" => "12.2", + "cusparseCcsrilu02_analysis" => "12.2", + "cusparseCcsrilu02" => "12.2", + "cusparseCcsrilu0" => "10.2", + "cusparseCcsric02_bufferSizeExt" => "12.2", + "cusparseCcsric02_bufferSize" => "12.2", + "cusparseCcsric02_analysis" => "12.2", + "cusparseCcsric02" => "12.2", + "cusparseCcsric0" => "10.2", + "cusparseCcsrgemm2_bufferSizeExt" => "11.0", + "cusparseCcsrgemm2" => "11.0", + "cusparseCcsrgemm" => "10.2", + "cusparseCcsrgeam" => "10.2", + "cusparseCcsrcolor" => "12.2", + "cusparseCcsr2hyb" => "10.2", + "cusparseCcsr2dense" => "11.1", + "cusparseCcsr2csru" => "12.2", + "cusparseCcsr2csr_compress" => "12.2", + "cusparseCcsr2csc" => "10.2", + "cusparseCcsr2bsr" => "12.4", + "cusparseCcsc2hyb" => "10.2", + "cusparseCcsc2dense" => "11.1", + "cusparseCbsrxmv" => "12.2", + "cusparseCbsrsv2_solve" => "12.2", + "cusparseCbsrsv2_bufferSizeExt" => "12.2", + "cusparseCbsrsv2_bufferSize" => "12.2", + "cusparseCbsrsv2_analysis" => "12.2", + "cusparseCbsrsm2_solve" => "12.2", + "cusparseCbsrsm2_bufferSizeExt" => "12.2", + "cusparseCbsrsm2_bufferSize" => "12.2", + "cusparseCbsrsm2_analysis" => "12.2", + "cusparseCbsrilu02_numericBoost" => "12.2", + "cusparseCbsrilu02_bufferSizeExt" => "12.2", + "cusparseCbsrilu02_bufferSize" => "12.2", + "cusparseCbsrilu02_analysis" => "12.2", + "cusparseCbsrilu02" => "12.2", + "cusparseCbsric02_bufferSizeExt" => "12.2", + "cusparseCbsric02_bufferSize" => "12.2", + "cusparseCbsric02_analysis" => "12.2", + "cusparseCbsric02" => "12.2", + "cusparseCaxpyi" => "11.0", + "cudnnTransformTensorEx" => "9.0.0", + "cudnnTransformTensor" => "9.0.0", + "cudnnTransformFilter" => "9.0.0", + "cudnnTensorTransformStruct" => "9.0.0", + "cudnnTensorTransformDescriptor_t" => "9.0.0", + "cudnnSetTensorTransformDescriptor" => "9.0.0", + "cudnnSetSeqDataDescriptor" => "9.0.0", + "cudnnSetReduceTensorDescriptor" => "9.0.0", + "cudnnSetRNNProjectionLayers" => "8.0.1", + "cudnnSetRNNPaddingMode" => "8.0.1", + "cudnnSetRNNMatrixMathType" => "8.0.1", + "cudnnSetRNNDescriptor_v6" => "8.0.1", + "cudnnSetRNNDescriptor_v5" => "7.6.5", + "cudnnSetRNNDescriptor" => "7.6.5", + "cudnnSetRNNBiasMode" => "8.0.1", + "cudnnSetRNNAlgorithmDescriptor" => "8.0.2", + "cudnnSetPoolingNdDescriptor" => "9.0.0", + "cudnnSetPooling2dDescriptor" => "9.0.0", + "cudnnSetPersistentRNNPlan" => "8.0.1", + "cudnnSetOpTensorDescriptor" => "9.0.0", + "cudnnSetFusedOpsVariantParamPackAttribute" => "9.0.0", + "cudnnSetFusedOpsConstParamPackAttribute" => "9.0.0", + "cudnnSetFilterNdDescriptor" => "9.0.0", + "cudnnSetFilter4dDescriptor" => "9.0.0", + "cudnnSetConvolutionReorderType" => "9.0.0", + "cudnnSetConvolutionNdDescriptor" => "9.0.0", + "cudnnSetConvolutionMathType" => "9.0.0", + "cudnnSetConvolutionGroupCount" => "9.0.0", + "cudnnSetConvolution2dDescriptor" => "9.0.0", + "cudnnSetCTCLossDescriptor_v8" => "9.0.0", + "cudnnSetCTCLossDescriptorEx" => "9.0.0", + "cudnnSetCTCLossDescriptor" => "9.0.0", + "cudnnSetAttnDescriptor" => "9.0.0", + "cudnnSetAlgorithmPerformance" => "8.0.2", + "cudnnSetAlgorithmDescriptor" => "8.0.2", + "cudnnSetActivationDescriptorSwishBeta" => "9.0.0", + "cudnnSetActivationDescriptor" => "9.0.0", + "cudnnSeqDataDescriptor_t" => "9.0.0", + "cudnnScaleTensor" => "9.0.0", + "cudnnSaveAlgorithm" => "8.0.2", + "cudnnRuntimeTag_t" => "9.0.0", + "cudnnRestoreAlgorithm" => "8.0.2", + "cudnnReorderType_t" => "9.0.0", + "cudnnReorderFilterAndBias" => "9.0.0", + "cudnnReduceTensorStruct" => "9.0.0", + "cudnnReduceTensorIndices_t" => "9.0.0", + "cudnnReduceTensorDescriptor_t" => "9.0.0", + "cudnnReduceTensor" => "9.0.0", + "cudnnRNNSetClip" => "8.0.1", + "cudnnRNNGetClip" => "8.0.1", + "cudnnRNNForwardTrainingEx" => "8.0.1", + "cudnnRNNForwardTraining" => "8.0.1", + "cudnnRNNForwardInferenceEx" => "8.0.1", + "cudnnRNNForwardInference" => "8.0.1", + "cudnnRNNBackwardWeightsEx" => "8.0.2", + "cudnnRNNBackwardWeights" => "8.0.2", + "cudnnRNNBackwardDataEx" => "8.0.2", + "cudnnRNNBackwardData" => "8.0.2", + "cudnnQueryRuntimeError" => "9.0.0", + "cudnnPoolingStruct" => "9.0.0", + "cudnnPoolingMode_t" => "9.0.0", + "cudnnPoolingForward" => "9.0.0", + "cudnnPoolingDescriptor_t" => "9.0.0", + "cudnnPoolingBackward" => "9.0.0", + "cudnnOpTensorStruct" => "9.0.0", + "cudnnOpTensorDescriptor_t" => "9.0.0", + "cudnnOpTensor" => "9.0.0", + "cudnnNormalizationForwardTraining" => "9.0.0", + "cudnnNormalizationForwardInference" => "9.0.0", + "cudnnNormalizationBackward" => "9.0.0", + "cudnnNormOps_t" => "9.0.0", + "cudnnNormMode_t" => "9.0.0", + "cudnnNormAlgo_t" => "9.0.0", + "cudnnMultiHeadAttnForward" => "9.0.0", + "cudnnMultiHeadAttnBackwardWeights" => "9.0.0", + "cudnnMultiHeadAttnBackwardData" => "9.0.0", + "cudnnMakeFusedOpsPlan" => "9.0.0", + "cudnnInitTransformDest" => "9.0.0", + "cudnnIndicesType_t" => "9.0.0", + "cudnnIm2Col" => "9.0.0", + "cudnnGetTensorTransformDescriptor" => "9.0.0", + "cudnnGetSeqDataDescriptor" => "9.0.0", + "cudnnGetReductionWorkspaceSize" => "9.0.0", + "cudnnGetReductionIndicesSize" => "9.0.0", + "cudnnGetReduceTensorDescriptor" => "9.0.0", + "cudnnGetRNNWorkspaceSize" => "8.0.1", + "cudnnGetRNNTrainingReserveSize" => "8.0.1", + "cudnnGetRNNProjectionLayers" => "8.0.1", + "cudnnGetRNNParamsSize" => "8.0.1", + "cudnnGetRNNPaddingMode" => "8.0.1", + "cudnnGetRNNMatrixMathType" => "8.0.1", + "cudnnGetRNNLinLayerMatrixParams" => "8.0.1", + "cudnnGetRNNLinLayerBiasParams" => "8.0.1", + "cudnnGetRNNForwardTrainingAlgorithmMaxCount" => "8.0.2", + "cudnnGetRNNForwardInferenceAlgorithmMaxCount" => "8.0.2", + "cudnnGetRNNDescriptor_v6" => "8.0.1", + "cudnnGetRNNDescriptor" => "7.6.5", + "cudnnGetRNNBiasMode" => "8.0.1", + "cudnnGetRNNBackwardWeightsAlgorithmMaxCount" => "8.0.2", + "cudnnGetRNNBackwardDataAlgorithmMaxCount" => "8.0.2", + "cudnnGetPoolingNdForwardOutputDim" => "9.0.0", + "cudnnGetPoolingNdDescriptor" => "9.0.0", + "cudnnGetPooling2dForwardOutputDim" => "9.0.0", + "cudnnGetPooling2dDescriptor" => "9.0.0", + "cudnnGetOpTensorDescriptor" => "9.0.0", + "cudnnGetNormalizationTrainingReserveSpaceSize" => "9.0.0", + "cudnnGetNormalizationForwardTrainingWorkspaceSize" => "9.0.0", + "cudnnGetNormalizationBackwardWorkspaceSize" => "9.0.0", + "cudnnGetMultiHeadAttnWeights" => "9.0.0", + "cudnnGetMultiHeadAttnBuffers" => "9.0.0", + "cudnnGetFusedOpsVariantParamPackAttribute" => "9.0.0", + "cudnnGetFusedOpsConstParamPackAttribute" => "9.0.0", + "cudnnGetFoldedConvBackwardDataDescriptors" => "9.0.0", + "cudnnGetFilterSizeInBytes" => "9.0.0", + "cudnnGetFilterNdDescriptor" => "9.0.0", + "cudnnGetFilter4dDescriptor" => "9.0.0", + "cudnnGetConvolutionReorderType" => "9.0.0", + "cudnnGetConvolutionNdForwardOutputDim" => "9.0.0", + "cudnnGetConvolutionNdDescriptor" => "9.0.0", + "cudnnGetConvolutionMathType" => "9.0.0", + "cudnnGetConvolutionGroupCount" => "9.0.0", + "cudnnGetConvolutionForwardWorkspaceSize" => "9.0.0", + "cudnnGetConvolutionForwardAlgorithm_v7" => "9.0.0", + "cudnnGetConvolutionForwardAlgorithmMaxCount" => "9.0.0", + "cudnnGetConvolutionForwardAlgorithm" => "7.6.5", + "cudnnGetConvolutionBackwardFilterWorkspaceSize" => "9.0.0", + "cudnnGetConvolutionBackwardFilterAlgorithm_v7" => "9.0.0", + "cudnnGetConvolutionBackwardFilterAlgorithm" => "7.6.5", + "cudnnGetConvolutionBackwardDataWorkspaceSize" => "9.0.0", + "cudnnGetConvolutionBackwardDataAlgorithm_v7" => "9.0.0", + "cudnnGetConvolutionBackwardDataAlgorithmMaxCount" => "9.0.0", + "cudnnGetConvolutionBackwardDataAlgorithm" => "7.6.5", + "cudnnGetConvolution2dForwardOutputDim" => "9.0.0", + "cudnnGetConvolution2dDescriptor" => "9.0.0", + "cudnnGetCTCLossDescriptor_v8" => "9.0.0", + "cudnnGetCTCLossDescriptorEx" => "9.0.0", + "cudnnGetCTCLossDescriptor" => "9.0.0", + "cudnnGetBatchNormalizationTrainingExReserveSpaceSize" => "9.0.0", + "cudnnGetBatchNormalizationForwardTrainingExWorkspaceSize" => "9.0.0", + "cudnnGetBatchNormalizationBackwardExWorkspaceSize" => "9.0.0", + "cudnnGetAttnDescriptor" => "9.0.0", + "cudnnGetAlgorithmSpaceSize" => "8.0.2", + "cudnnGetAlgorithmPerformance" => "8.0.2", + "cudnnGetAlgorithmDescriptor" => "8.0.2", + "cudnnGetActivationDescriptorSwishBeta" => "9.0.0", + "cudnnGetActivationDescriptor" => "9.0.0", + "cudnnFusedOps_t" => "9.0.0", + "cudnnFusedOpsVariantParamStruct" => "9.0.0", + "cudnnFusedOpsVariantParamPack_t" => "9.0.0", + "cudnnFusedOpsVariantParamLabel_t" => "9.0.0", + "cudnnFusedOpsPointerPlaceHolder_t" => "9.0.0", + "cudnnFusedOpsPlan_t" => "9.0.0", + "cudnnFusedOpsPlanStruct" => "9.0.0", + "cudnnFusedOpsExecute" => "9.0.0", + "cudnnFusedOpsConstParamStruct" => "9.0.0", + "cudnnFusedOpsConstParamPack_t" => "9.0.0", + "cudnnFusedOpsConstParamLabel_t" => "9.0.0", + "cudnnFindRNNForwardTrainingAlgorithmEx" => "8.0.2", + "cudnnFindRNNForwardInferenceAlgorithmEx" => "8.0.2", + "cudnnFindRNNBackwardWeightsAlgorithmEx" => "8.0.2", + "cudnnFindRNNBackwardDataAlgorithmEx" => "8.0.2", + "cudnnFindConvolutionForwardAlgorithmEx" => "9.0.0", + "cudnnFindConvolutionForwardAlgorithm" => "9.0.0", + "cudnnFindConvolutionBackwardFilterAlgorithmEx" => "9.0.0", + "cudnnFindConvolutionBackwardFilterAlgorithm" => "9.0.0", + "cudnnFindConvolutionBackwardDataAlgorithmEx" => "9.0.0", + "cudnnFindConvolutionBackwardDataAlgorithm" => "9.0.0", + "cudnnFilterStruct" => "9.0.0", + "cudnnDestroyTensorTransformDescriptor" => "9.0.0", + "cudnnDestroySeqDataDescriptor" => "9.0.0", + "cudnnDestroyReduceTensorDescriptor" => "9.0.0", + "cudnnDestroyPoolingDescriptor" => "9.0.0", + "cudnnDestroyPersistentRNNPlan" => "8.0.1", + "cudnnDestroyOpTensorDescriptor" => "9.0.0", + "cudnnDestroyFusedOpsVariantParamPack" => "9.0.0", + "cudnnDestroyFusedOpsPlan" => "9.0.0", + "cudnnDestroyFusedOpsConstParamPack" => "9.0.0", + "cudnnDestroyFilterDescriptor" => "9.0.0", + "cudnnDestroyConvolutionDescriptor" => "9.0.0", + "cudnnDestroyAttnDescriptor" => "9.0.0", + "cudnnDestroyAlgorithmPerformance" => "8.0.2", + "cudnnDestroyAlgorithmDescriptor" => "8.0.2", + "cudnnDestroyActivationDescriptor" => "9.0.0", + "cudnnDeriveNormTensorDescriptor" => "9.0.0", + "cudnnDeriveBNTensorDescriptor" => "9.0.0", + "cudnnCreateTensorTransformDescriptor" => "9.0.0", + "cudnnCreateSeqDataDescriptor" => "9.0.0", + "cudnnCreateReduceTensorDescriptor" => "9.0.0", + "cudnnCreatePoolingDescriptor" => "9.0.0", + "cudnnCreatePersistentRNNPlan" => "8.0.1", + "cudnnCreateOpTensorDescriptor" => "9.0.0", + "cudnnCreateFusedOpsVariantParamPack" => "9.0.0", + "cudnnCreateFusedOpsPlan" => "9.0.0", + "cudnnCreateFusedOpsConstParamPack" => "9.0.0", + "cudnnCreateFilterDescriptor" => "9.0.0", + "cudnnCreateConvolutionDescriptor" => "9.0.0", + "cudnnCreateAttnDescriptor" => "9.0.0", + "cudnnCreateAlgorithmPerformance" => "8.0.2", + "cudnnCreateAlgorithmDescriptor" => "8.0.2", + "cudnnCreateActivationDescriptor" => "9.0.0", + "cudnnCopyAlgorithmDescriptor" => "8.0.2", + "cudnnConvolutionFwdPreference_t" => "7.6.5", + "cudnnConvolutionFwdAlgoPerf_t" => "9.0.0", + "cudnnConvolutionForward" => "9.0.0", + "cudnnConvolutionDescriptor_t" => "9.0.0", + "cudnnConvolutionBwdFilterPreference_t" => "7.6.5", + "cudnnConvolutionBwdFilterAlgoPerf_t" => "9.0.0", + "cudnnConvolutionBwdDataPreference_t" => "7.6.5", + "cudnnConvolutionBwdDataAlgoPerf_t" => "9.0.0", + "cudnnConvolutionBwdDataAlgoPerfStruct" => "9.0.0", + "cudnnConvolutionBiasActivationForward" => "9.0.0", + "cudnnConvolutionBackwardFilter" => "9.0.0", + "cudnnConvolutionBackwardData" => "9.0.0", + "cudnnConvolutionBackwardBias" => "9.0.0", + "cudnnBatchNormalizationForwardTrainingEx" => "9.0.0", + "cudnnBatchNormalizationForwardTraining" => "9.0.0", + "cudnnBatchNormalizationForwardInference" => "9.0.0", + "cudnnBatchNormalizationBackwardEx" => "9.0.0", + "cudnnBatchNormalizationBackward" => "9.0.0", + "cudnnBatchNormOps_t" => "9.0.0", + "cudnnBatchNormMode_t" => "9.0.0", + "cudnnAttnDescriptor_t" => "9.0.0", + "cudnnAddTensor" => "9.0.0", + "cudnnActivationStruct" => "9.0.0", + "cudnnActivationMode_t" => "9.0.0", + "cudnnActivationForward" => "9.0.0", + "cudnnActivationDescriptor_t" => "9.0.0", + "cudnnActivationBackward" => "9.0.0", + "cudaThreadSynchronize" => "10.0", + "cudaThreadSetLimit" => "10.0", + "cudaThreadSetCacheConfig" => "10.0", + "cudaThreadGetLimit" => "10.0", + "cudaThreadGetCacheConfig" => "10.0", + "cudaThreadExit" => "10.0", + "cudaSharedMemConfig" => "12.4", + "cudaSetDoubleForHost" => "10.0", + "cudaSetDoubleForDevice" => "10.0", + "cudaMemcpyToArrayAsync" => "10.1", + "cudaMemcpyToArray" => "10.1", + "cudaMemcpyFromArrayAsync" => "10.1", + "cudaMemcpyFromArray" => "10.1", + "cudaMemcpyArrayToArray" => "10.1", + "cudaLaunchCooperativeKernelMultiDevice" => "11.3", + "cudaGLUnregisterBufferObject" => "10.0", + "cudaGLUnmapBufferObjectAsync" => "10.0", + "cudaGLUnmapBufferObject" => "10.0", + "cudaGLSetGLDevice" => "10.0", + "cudaGLSetBufferObjectMapFlags" => "10.0", + "cudaGLRegisterBufferObject" => "10.0", + "cudaGLMapBufferObjectAsync" => "10.0", + "cudaGLMapBufferObject" => "10.0", + "cudaFuncSetSharedMemConfig" => "12.4", + "cudaErrorTextureNotBound" => "3.1", + "cudaErrorTextureFetchFailed" => "3.1", + "cudaErrorSynchronizationError" => "3.1", + "cudaErrorProfilerNotInitialized" => "5.0", + "cudaErrorProfilerAlreadyStopped" => "5.0", + "cudaErrorProfilerAlreadyStarted" => "5.0", + "cudaErrorPriorLaunchFailure" => "3.1", + "cudaErrorNotYetImplemented" => "4.1", + "cudaErrorMixedDeviceExecution" => "3.1", + "cudaErrorMemoryValueTooLarge" => "3.1", + "cudaErrorInvalidHostPointer" => "10.1", + "cudaErrorInvalidDevicePointer" => "10.1", + "cudaErrorApiFailureBase" => "4.1", + "cudaErrorAddressOfConstant" => "3.1", + "cudaDeviceSetSharedMemConfig" => "12.4", + "cudaDeviceGetSharedMemConfig" => "12.4", + "cudaDevAttrMaxTimelineSemaphoreInteropSupported" => "11.5", + "cudaD3D9UnregisterResource" => "10.0", + "cudaD3D9UnmapResources" => "10.0", + "cudaD3D9ResourceSetMapFlags" => "10.0", + "cudaD3D9ResourceGetSurfaceDimensions" => "10.0", + "cudaD3D9ResourceGetMappedSize" => "10.0", + "cudaD3D9ResourceGetMappedPointer" => "10.0", + "cudaD3D9ResourceGetMappedPitch" => "10.0", + "cudaD3D9ResourceGetMappedArray" => "10.0", + "cudaD3D9MapResources" => "10.0", + "cudaD3D11SetDirect3DDevice" => "10.0", + "cudaD3D11GetDirect3DDevice" => "10.0", + "cudaD3D10UnregisterResource" => "10.0", + "cudaD3D10UnmapResources" => "10.0", + "cudaD3D10SetDirect3DDevice" => "10.0", + "cudaD3D10ResourceSetMapFlags" => "10.0", + "cudaD3D10ResourceGetSurfaceDimensions" => "10.0", + "cudaD3D10ResourceGetMappedSize" => "10.0", + "cudaD3D10ResourceGetMappedPointer" => "10.0", + "cudaD3D10ResourceGetMappedPitch" => "10.0", + "cudaD3D10ResourceGetMappedArray" => "10.0", + "cudaD3D10RegisterResource" => "10.0", + "cudaD3D10MapResources" => "10.0", + "cudaD3D10GetDirect3DDevice" => "10.0", + "cuTexRefSetMipmappedArray" => "11.0", + "cuTexRefSetMipmapLevelClamp" => "11.0", + "cuTexRefSetMipmapLevelBias" => "11.0", + "cuTexRefSetMipmapFilterMode" => "11.0", + "cuTexRefSetMaxAnisotropy" => "11.0", + "cuTexRefSetFormat" => "11.0", + "cuTexRefSetFlags" => "11.0", + "cuTexRefSetFilterMode" => "11.0", + "cuTexRefSetBorderColor" => "11.0", + "cuTexRefSetArray" => "11.0", + "cuTexRefSetAddress_v2" => "11.0", + "cuTexRefSetAddressMode" => "11.0", + "cuTexRefSetAddress2D" => "11.0", + "cuTexRefSetAddress" => "11.0", + "cuTexRefGetMipmappedArray" => "11.0", + "cuTexRefGetMipmapLevelClamp" => "11.0", + "cuTexRefGetMipmapLevelBias" => "11.0", + "cuTexRefGetMipmapFilterMode" => "11.0", + "cuTexRefGetMaxAnisotropy" => "11.0", + "cuTexRefGetFormat" => "11.0", + "cuTexRefGetFlags" => "11.0", + "cuTexRefGetFilterMode" => "11.0", + "cuTexRefGetBorderColor" => "11.0", + "cuTexRefGetArray" => "11.0", + "cuTexRefGetAddress_v2" => "11.0", + "cuTexRefGetAddressMode" => "11.0", + "cuTexRefGetAddress" => "11.0", + "cuTexRefDestroy" => "11.0", + "cuTexRefCreate" => "11.0", + "cuSurfRefSetArray" => "11.0", + "cuSurfRefGetArray" => "11.0", + "cuParamSetv" => "9.2", + "cuParamSeti" => "9.2", + "cuParamSetf" => "9.2", + "cuParamSetTexRef" => "9.2", + "cuParamSetSize" => "9.2", + "cuModuleGetTexRef" => "12.0", + "cuModuleGetSurfRef" => "12.0", + "cuLaunchGridAsync" => "9.2", + "cuLaunchGrid" => "9.2", + "cuLaunchCooperativeKernelMultiDevice" => "11.3", + "cuLaunch" => "9.2", + "cuGLUnregisterBufferObject" => "9.2", + "cuGLUnmapBufferObjectAsync" => "9.2", + "cuGLUnmapBufferObject" => "9.2", + "cuGLSetBufferObjectMapFlags" => "9.2", + "cuGLRegisterBufferObject" => "9.2", + "cuGLMapBufferObjectAsync" => "9.2", + "cuGLMapBufferObject" => "9.2", + "cuGLInit" => "9.2", + "cuGLCtxCreate" => "9.2", + "cuFuncSetSharedSize" => "9.2", + "cuFuncSetSharedMemConfig" => "", + "cuFuncSetBlockShape" => "9.2", + "cuDeviceGetProperties" => "9.2", + "cuDeviceComputeCapability" => "9.2", + "cuD3D9UnregisterResource" => "9.2", + "cuD3D9UnmapResources" => "9.2", + "cuD3D9ResourceSetMapFlags" => "9.2", + "cuD3D9ResourceGetSurfaceDimensions" => "9.2", + "cuD3D9ResourceGetMappedSize" => "9.2", + "cuD3D9ResourceGetMappedPointer" => "9.2", + "cuD3D9ResourceGetMappedPitch" => "9.2", + "cuD3D9ResourceGetMappedArray" => "9.2", + "cuD3D9RegisterResource" => "9.2", + "cuD3D9MapResources" => "9.2", + "cuD3D11GetDirect3DDevice" => "9.2", + "cuD3D11CtxCreateOnDevice" => "9.2", + "cuD3D11CtxCreate" => "9.2", + "cuD3D10UnregisterResource" => "9.2", + "cuD3D10UnmapResources" => "9.2", + "cuD3D10ResourceSetMapFlags" => "9.2", + "cuD3D10ResourceGetSurfaceDimensions" => "9.2", + "cuD3D10ResourceGetMappedSize" => "9.2", + "cuD3D10ResourceGetMappedPointer" => "9.2", + "cuD3D10ResourceGetMappedPitch" => "9.2", + "cuD3D10ResourceGetMappedArray" => "9.2", + "cuD3D10RegisterResource" => "9.2", + "cuD3D10MapResources" => "9.2", + "cuD3D10GetDirect3DDevice" => "9.2", + "cuD3D10CtxCreateOnDevice" => "9.2", + "cuD3D10CtxCreate" => "9.2", + "cuCtxSetSharedMemConfig" => "", + "cuCtxGetSharedMemConfig" => "", + "csru2csrInfo_t" => "12.2", + "csru2csrInfo" => "12.2", + "csrilu02Info_t" => "12.2", + "csrilu02Info" => "12.2", + "csric02Info_t" => "12.2", + "csric02Info" => "12.2", + "bsrsv2Info_t" => "12.2", + "bsrsv2Info" => "12.2", + "bsrsm2Info_t" => "12.2", + "bsrsm2Info" => "12.2", + "bsrilu02Info_t" => "12.2", + "bsrilu02Info" => "12.2", + "CU_JIT_REFERENCED_VARIABLE_NAMES" => "12.0", + "CU_JIT_REFERENCED_VARIABLE_COUNT" => "12.0", + "CU_JIT_REFERENCED_KERNEL_NAMES" => "12.0", + "CU_JIT_REFERENCED_KERNEL_COUNT" => "12.0", + "CU_JIT_PREC_SQRT" => "12.0", + "CU_JIT_PREC_DIV" => "12.0", + "CU_JIT_OPTIMIZE_UNUSED_DEVICE_VARIABLES" => "12.0", + "CU_JIT_LTO" => "12.0", + "CU_JIT_INPUT_NVVM" => "12.0", + "CU_JIT_FTZ" => "12.0", + "CU_JIT_FMA" => "12.0", + "CU_DEVICE_P2P_ATTRIBUTE_ARRAY_ACCESS_ACCESS_SUPPORTED" => "10.0", + "CU_DEVICE_P2P_ATTRIBUTE_ACCESS_ACCESS_SUPPORTED" => "10.1", + "CU_DEVICE_ATTRIBUTE_VIRTUAL_ADDRESS_MANAGEMENT_SUPPORTED" => "11.2", + "CU_DEVICE_ATTRIBUTE_SHARED_MEMORY_PER_BLOCK" => "5.0", + "CU_DEVICE_ATTRIBUTE_REGISTERS_PER_BLOCK" => "5.0", + "CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_ARRAY_WIDTH" => "5.0", + "CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_ARRAY_NUMSLICES" => "5.0", + "CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_ARRAY_HEIGHT" => "5.0", + "CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_LINEAR_WIDTH" => "11.2", + "CU_DEVICE_ATTRIBUTE_GPU_OVERLAP" => "5.0", + "CU_DEVICE_ATTRIBUTE_CAN_TEX2D_GATHER" => "5.0", + "CU_CTX_BLOCKING_SYNC" => "4.0", + "CUSPARSE_SOLVE_POLICY_USE_LEVEL" => "12.2", + "CUSPARSE_SOLVE_POLICY_NO_LEVEL" => "12.2", + "CUSPARSE_MV_ALG_DEFAULT" => "11.3", + "CUSPARSE_MM_ALG_DEFAULT" => "11.0", + "CUSPARSE_HYB_PARTITION_USER" => "10.2", + "CUSPARSE_HYB_PARTITION_MAX" => "10.2", + "CUSPARSE_HYB_PARTITION_AUTO" => "10.2", + "CUSPARSE_CSRMV_ALG2" => "11.2", + "CUSPARSE_CSRMV_ALG1" => "11.2", + "CUSPARSE_CSRMM_ALG1" => "11.0", + "CUSPARSE_COOMV_ALG" => "11.2", + "CUSPARSE_COOMM_ALG3" => "11.0", + "CUSPARSE_COOMM_ALG2" => "11.0", + "CUSPARSE_COOMM_ALG1" => "11.0", + "CUSPARSE_COLOR_ALG1" => "12.2", + "CUSPARSE_COLOR_ALG0" => "12.2", + "CUDNN_TYPE_NAN_PROPOGATION" => "9.0.0", + "CUDNN_STATUS_VERSION_MISMATCH" => "9.0.0", + "CUDNN_STATUS_RUNTIME_PREREQUISITE_MISSING" => "9.0.0", + "CUDNN_STATUS_MAPPING_ERROR" => "9.0.0", + "CUDNN_STATUS_INVALID_VALUE" => "9.0.0", + "CUDNN_STATUS_ARCH_MISMATCH" => "9.0.0", + "CUDNN_STATUS_ALLOC_FAILED" => "9.0.0", + "CUDNN_REDUCE_TENSOR_NO_INDICES" => "9.0.0", + "CUDNN_REDUCE_TENSOR_NORM2" => "9.0.0", + "CUDNN_REDUCE_TENSOR_NORM1" => "9.0.0", + "CUDNN_REDUCE_TENSOR_MUL_NO_ZEROS" => "9.0.0", + "CUDNN_REDUCE_TENSOR_MUL" => "9.0.0", + "CUDNN_REDUCE_TENSOR_MIN" => "9.0.0", + "CUDNN_REDUCE_TENSOR_MAX" => "9.0.0", + "CUDNN_REDUCE_TENSOR_FLATTENED_INDICES" => "9.0.0", + "CUDNN_REDUCE_TENSOR_AVG" => "9.0.0", + "CUDNN_REDUCE_TENSOR_AMAX" => "9.0.0", + "CUDNN_REDUCE_TENSOR_ADD" => "9.0.0", + "CUDNN_PROPAGATE_NAN" => "9.0.0", + "CUDNN_POOLING_MAX_DETERMINISTIC" => "9.0.0", + "CUDNN_POOLING_MAX" => "9.0.0", + "CUDNN_POOLING_AVERAGE_COUNT_INCLUDE_PADDING" => "9.0.0", + "CUDNN_POOLING_AVERAGE_COUNT_EXCLUDE_PADDING" => "9.0.0", + "CUDNN_NO_REORDER" => "9.0.0", + "CUDNN_NOT_PROPAGATE_NAN" => "9.0.0", + "CUDNN_NORM_PER_CHANNEL" => "9.0.0", + "CUDNN_NORM_PER_ACTIVATION" => "9.0.0", + "CUDNN_NORM_OPS_NORM_ADD_ACTIVATION" => "9.0.0", + "CUDNN_NORM_OPS_NORM_ACTIVATION" => "9.0.0", + "CUDNN_NORM_OPS_NORM" => "9.0.0", + "CUDNN_NORM_ALGO_STANDARD" => "9.0.0", + "CUDNN_NORM_ALGO_PERSIST" => "9.0.0", + "CUDNN_KNOB_TYPE_WINO_TILE" => "9.0.0", + "CUDNN_KNOB_TYPE_USE_TEX" => "9.0.0", + "CUDNN_KNOB_TYPE_TILE_CGA" => "9.0.0", + "CUDNN_KNOB_TYPE_SPLIT_RS" => "9.0.0", + "CUDNN_KNOB_TYPE_SPLIT_K" => "9.0.0", + "CUDNN_KNOB_TYPE_SPLIT_H" => "9.0.0", + "CUDNN_KNOB_TYPE_SLICED" => "9.0.0", + "CUDNN_KNOB_TYPE_SINGLEBUFFER" => "9.0.0", + "CUDNN_KNOB_TYPE_NUM_C_PER_BLOCK" => "9.0.0", + "CUDNN_KNOB_TYPE_LDGC" => "9.0.0", + "CUDNN_KNOB_TYPE_LDGB" => "9.0.0", + "CUDNN_KNOB_TYPE_LDGA" => "9.0.0", + "CUDNN_KNOB_TYPE_KBLOCK" => "9.0.0", + "CUDNN_KNOB_TYPE_IDX_MODE" => "9.0.0", + "CUDNN_KNOB_TYPE_CTA_SPLIT_K_MODE" => "9.0.0", + "CUDNN_KNOB_TYPE_CHUNK_K" => "9.0.0", + "CUDNN_DEFAULT_REORDER" => "9.0.0", + "CUDNN_DATA_UINT8x4" => "9.0.0", + "CUDNN_DATA_INT8x4" => "9.0.0", + "CUDNN_DATA_INT8x32" => "9.0.0", + "CUDNN_CROSS_CORRELATION" => "9.0.0", + "CUDNN_CONVOLUTION_FWD_SPECIFY_WORKSPACE_LIMIT" => "7.6.5", + "CUDNN_CONVOLUTION_FWD_PREFER_FASTEST" => "7.6.5", + "CUDNN_CONVOLUTION_FWD_NO_WORKSPACE" => "7.6.5", + "CUDNN_CONVOLUTION_BWD_FILTER_SPECIFY_WORKSPACE_LIMIT" => "7.6.5", + "CUDNN_CONVOLUTION_BWD_FILTER_PREFER_FASTEST" => "7.6.5", + "CUDNN_CONVOLUTION_BWD_FILTER_NO_WORKSPACE" => "7.6.5", + "CUDNN_CONVOLUTION_BWD_DATA_SPECIFY_WORKSPACE_LIMIT" => "7.6.5", + "CUDNN_CONVOLUTION_BWD_DATA_PREFER_FASTEST" => "7.6.5", + "CUDNN_CONVOLUTION_BWD_DATA_NO_WORKSPACE" => "7.6.5", + "CUDNN_CONVOLUTION" => "9.0.0", + "CUDNN_BATCHNORM_SPATIAL_PERSISTENT" => "9.0.0", + "CUDNN_BATCHNORM_SPATIAL" => "9.0.0", + "CUDNN_BATCHNORM_PER_ACTIVATION" => "9.0.0", + "CUDNN_BATCHNORM_OPS_BN_ADD_ACTIVATION" => "9.0.0", + "CUDNN_BATCHNORM_OPS_BN_ACTIVATION" => "9.0.0", + "CUDNN_BATCHNORM_OPS_BN" => "9.0.0", + "CUDNN_ATTR_OPERATION_RESAMPLE_FWD_BETA" => "9.0.0", + "CUDNN_ATTR_OPERATION_RESAMPLE_FWD_ALPHA" => "9.0.0", + "CUDNN_ATTR_OPERATION_RESAMPLE_BWD_BETA" => "9.0.0", + "CUDNN_ATTR_OPERATION_RESAMPLE_BWD_ALPHA" => "9.0.0", + "CUDNN_ATTR_OPERATION_MATMUL_IRREGULARLY_STRIDED_BATCH_COUNT" => "9.0.0", + "CUDNN_ACTIVATION_TANH" => "9.0.0", + "CUDNN_ACTIVATION_SWISH" => "9.0.0", + "CUDNN_ACTIVATION_SIGMOID" => "9.0.0", + "CUDNN_ACTIVATION_RELU" => "9.0.0", + "CUDNN_ACTIVATION_IDENTITY" => "9.0.0", + "CUDNN_ACTIVATION_ELU" => "9.0.0", + "CUDNN_ACTIVATION_CLIPPED_RELU" => "9.0.0", + "CUDNN_8BIT_INDICES" => "9.0.0", + "CUDNN_64BIT_INDICES" => "9.0.0", + "CUDNN_32BIT_INDICES" => "9.0.0", + "CUDNN_16BIT_INDICES" => "9.0.0", + "CUDA_ERROR_PROFILER_NOT_INITIALIZED" => "5.0", + "CUDA_ERROR_PROFILER_ALREADY_STOPPED" => "5.0", + "CUDA_ERROR_PROFILER_ALREADY_STARTED" => "5.0", + "CUDA_ERROR_CONTEXT_ALREADY_CURRENT" => "3.2", + "CUDA_ARRAY3D_2DARRAY" => "5.0", + "CUBLAS_TENSOR_OP_MATH" => "11.0" +); + +my %removed_funcs = ( + "texture" => "12.0", + "surfaceReference" => "12.0", + "cusparseZsctr" => "12.0", + "cusparseZhybsv_solve" => "11.0", + "cusparseZhybsv_analysis" => "11.0", + "cusparseZhybmv" => "11.0", + "cusparseZhyb2dense" => "11.0", + "cusparseZhyb2csr" => "11.0", + "cusparseZhyb2csc" => "11.0", + "cusparseZgtsv_nopivot" => "11.0", + "cusparseZgtsvStridedBatch" => "11.0", + "cusparseZgtsv" => "11.0", + "cusparseZgthrz" => "12.0", + "cusparseZgthr" => "12.0", + "cusparseZgemmi" => "12.0", + "cusparseZdoti" => "11.0", + "cusparseZdotci" => "11.0", + "cusparseZdense2hyb" => "11.0", + "cusparseZdense2csr" => "12.0", + "cusparseZdense2csc" => "12.0", + "cusparseZcsrsv_solve" => "11.0", + "cusparseZcsrsv_analysis" => "11.0", + "cusparseZcsrsv2_solve" => "12.0", + "cusparseZcsrsv2_bufferSizeExt" => "12.0", + "cusparseZcsrsv2_bufferSize" => "12.0", + "cusparseZcsrsv2_analysis" => "12.0", + "cusparseZcsrsm_solve" => "11.0", + "cusparseZcsrsm_analysis" => "11.0", + "cusparseZcsrsm2_solve" => "12.0", + "cusparseZcsrsm2_bufferSizeExt" => "12.0", + "cusparseZcsrsm2_analysis" => "12.0", + "cusparseZcsrmv_mp" => "11.0", + "cusparseZcsrmv" => "11.0", + "cusparseZcsrmm2" => "11.0", + "cusparseZcsrmm" => "11.0", + "cusparseZcsrilu0" => "11.0", + "cusparseZcsric0" => "11.0", + "cusparseZcsrgemm2_bufferSizeExt" => "12.0", + "cusparseZcsrgemm2" => "12.0", + "cusparseZcsrgemm" => "11.0", + "cusparseZcsrgeam" => "11.0", + "cusparseZcsr2hyb" => "11.0", + "cusparseZcsr2dense" => "12.0", + "cusparseZcsr2csc" => "11.0", + "cusparseZcsc2hyb" => "11.0", + "cusparseZcsc2dense" => "12.0", + "cusparseZaxpyi" => "12.0", + "cusparseXcsrsv2_zeroPivot" => "12.0", + "cusparseXcsrsm2_zeroPivot" => "12.0", + "cusparseXcsrgemmNnz" => "11.0", + "cusparseXcsrgemm2Nnz" => "12.0", + "cusparseXcsrgeamNnz" => "11.0", + "cusparseSsctr" => "12.0", + "cusparseSroti" => "12.0", + "cusparseSpMatSetStridedBatch" => "12.0", + "cusparseSolveAnalysisInfo_t" => "11.0", + "cusparseSolveAnalysisInfo" => "11.0", + "cusparseSideMode_t" => "11.5", + "cusparseShybsv_solve" => "11.0", + "cusparseShybsv_analysis" => "11.0", + "cusparseShybmv" => "11.0", + "cusparseShyb2dense" => "11.0", + "cusparseShyb2csr" => "11.0", + "cusparseShyb2csc" => "11.0", + "cusparseSgtsv_nopivot" => "11.0", + "cusparseSgtsvStridedBatch" => "11.0", + "cusparseSgtsv" => "11.0", + "cusparseSgthrz" => "12.0", + "cusparseSgthr" => "12.0", + "cusparseSgemmi" => "12.0", + "cusparseSdoti" => "11.0", + "cusparseSdense2hyb" => "11.0", + "cusparseSdense2csr" => "12.0", + "cusparseSdense2csc" => "12.0", + "cusparseScsrsv_solve" => "11.0", + "cusparseScsrsv_analysis" => "11.0", + "cusparseScsrsv2_solve" => "12.0", + "cusparseScsrsv2_bufferSizeExt" => "12.0", + "cusparseScsrsv2_bufferSize" => "12.0", + "cusparseScsrsv2_analysis" => "12.0", + "cusparseScsrsm_solve" => "11.0", + "cusparseScsrsm_analysis" => "11.0", + "cusparseScsrsm2_solve" => "12.0", + "cusparseScsrsm2_bufferSizeExt" => "12.0", + "cusparseScsrsm2_analysis" => "12.0", + "cusparseScsrmv_mp" => "11.0", + "cusparseScsrmv" => "11.0", + "cusparseScsrmm2" => "11.0", + "cusparseScsrmm" => "11.0", + "cusparseScsrilu0" => "11.0", + "cusparseScsric0" => "11.0", + "cusparseScsrgemm2_bufferSizeExt" => "12.0", + "cusparseScsrgemm2" => "12.0", + "cusparseScsrgemm" => "11.0", + "cusparseScsrgeam" => "11.0", + "cusparseScsr2hyb" => "11.0", + "cusparseScsr2dense" => "12.0", + "cusparseScsr2csc" => "11.0", + "cusparseScsc2hyb" => "11.0", + "cusparseScsc2dense" => "12.0", + "cusparseSaxpyi" => "12.0", + "cusparseHybPartition_t" => "11.0", + "cusparseHybMat_t" => "11.0", + "cusparseHybMat" => "11.0", + "cusparseGetLevelInfo" => "11.0", + "cusparseDsctr" => "12.0", + "cusparseDroti" => "12.0", + "cusparseDhybsv_solve" => "11.0", + "cusparseDhybsv_analysis" => "11.0", + "cusparseDhybmv" => "11.0", + "cusparseDhyb2dense" => "11.0", + "cusparseDhyb2csr" => "11.0", + "cusparseDhyb2csc" => "11.0", + "cusparseDgtsv_nopivot" => "11.0", + "cusparseDgtsvStridedBatch" => "11.0", + "cusparseDgtsv" => "11.0", + "cusparseDgthrz" => "12.0", + "cusparseDgthr" => "12.0", + "cusparseDgemmi" => "12.0", + "cusparseDestroySolveAnalysisInfo" => "11.0", + "cusparseDestroyHybMat" => "11.0", + "cusparseDestroyCsrsv2Info" => "12.0", + "cusparseDestroyCsrsm2Info" => "12.0", + "cusparseDestroyCsrgemm2Info" => "12.0", + "cusparseDdoti" => "11.0", + "cusparseDdense2hyb" => "11.0", + "cusparseDdense2csr" => "12.0", + "cusparseDdense2csc" => "12.0", + "cusparseDcsrsv_solve" => "11.0", + "cusparseDcsrsv_analysis" => "11.0", + "cusparseDcsrsv2_solve" => "12.0", + "cusparseDcsrsv2_bufferSizeExt" => "12.0", + "cusparseDcsrsv2_bufferSize" => "12.0", + "cusparseDcsrsv2_analysis" => "12.0", + "cusparseDcsrsm_solve" => "11.0", + "cusparseDcsrsm_analysis" => "11.0", + "cusparseDcsrsm2_solve" => "12.0", + "cusparseDcsrsm2_bufferSizeExt" => "12.0", + "cusparseDcsrsm2_analysis" => "12.0", + "cusparseDcsrmv_mp" => "11.0", + "cusparseDcsrmv" => "11.0", + "cusparseDcsrmm2" => "11.0", + "cusparseDcsrmm" => "11.0", + "cusparseDcsrilu0" => "11.0", + "cusparseDcsric0" => "11.0", + "cusparseDcsrgemm2_bufferSizeExt" => "12.0", + "cusparseDcsrgemm2" => "12.0", + "cusparseDcsrgemm" => "11.0", + "cusparseDcsrgeam" => "11.0", + "cusparseDcsr2hyb" => "11.0", + "cusparseDcsr2dense" => "12.0", + "cusparseDcsr2csc" => "11.0", + "cusparseDcsc2hyb" => "11.0", + "cusparseDcsc2dense" => "12.0", + "cusparseDaxpyi" => "12.0", + "cusparseCsrsv_solveEx" => "11.0", + "cusparseCsrsv_analysisEx" => "11.0", + "cusparseCsrmvEx_bufferSize" => "12.0", + "cusparseCsrmvEx" => "12.0", + "cusparseCsrilu0Ex" => "11.0", + "cusparseCsr2cscEx" => "11.0", + "cusparseCsctr" => "12.0", + "cusparseCreateSolveAnalysisInfo" => "11.0", + "cusparseCreateHybMat" => "11.0", + "cusparseCreateCsrsv2Info" => "12.0", + "cusparseCreateCsrsm2Info" => "12.0", + "cusparseCreateCsrgemm2Info" => "12.0", + "cusparseCreateCooAoS" => "12.0", + "cusparseCopyMatDescr" => "12.0", + "cusparseCooAoSGet" => "12.0", + "cusparseConstrainedGeMM_bufferSize" => "12.0", + "cusparseConstrainedGeMM" => "12.0", + "cusparseChybsv_solve" => "11.0", + "cusparseChybsv_analysis" => "11.0", + "cusparseChybmv" => "11.0", + "cusparseChyb2dense" => "11.0", + "cusparseChyb2csr" => "11.0", + "cusparseChyb2csc" => "11.0", + "cusparseCgtsv_nopivot" => "11.0", + "cusparseCgtsvStridedBatch" => "11.0", + "cusparseCgtsv" => "11.0", + "cusparseCgthrz" => "12.0", + "cusparseCgthr" => "12.0", + "cusparseCgemmi" => "12.0", + "cusparseCdoti" => "11.0", + "cusparseCdotci" => "11.0", + "cusparseCdense2hyb" => "11.0", + "cusparseCdense2csr" => "12.0", + "cusparseCdense2csc" => "12.0", + "cusparseCcsrsv_solve" => "11.0", + "cusparseCcsrsv_analysis" => "11.0", + "cusparseCcsrsv2_solve" => "12.0", + "cusparseCcsrsv2_bufferSizeExt" => "12.0", + "cusparseCcsrsv2_bufferSize" => "12.0", + "cusparseCcsrsv2_analysis" => "12.0", + "cusparseCcsrsm_solve" => "11.0", + "cusparseCcsrsm_analysis" => "11.0", + "cusparseCcsrsm2_solve" => "12.0", + "cusparseCcsrsm2_bufferSizeExt" => "12.0", + "cusparseCcsrsm2_analysis" => "12.0", + "cusparseCcsrmv_mp" => "11.0", + "cusparseCcsrmv" => "11.0", + "cusparseCcsrmm2" => "11.0", + "cusparseCcsrmm" => "11.0", + "cusparseCcsrilu0" => "11.0", + "cusparseCcsric0" => "11.0", + "cusparseCcsrgemm2_bufferSizeExt" => "12.0", + "cusparseCcsrgemm2" => "12.0", + "cusparseCcsrgemm" => "11.0", + "cusparseCcsrgeam" => "11.0", + "cusparseCcsr2hyb" => "11.0", + "cusparseCcsr2dense" => "12.0", + "cusparseCcsr2csc" => "11.0", + "cusparseCcsc2hyb" => "11.0", + "cusparseCcsc2dense" => "12.0", + "cusparseCaxpyi" => "12.0", + "cusparseAlgMode_t" => "12.0", + "cudnnSetRNNProjectionLayers" => "9.0.0", + "cudnnSetRNNPaddingMode" => "9.0.0", + "cudnnSetRNNMatrixMathType" => "9.0.0", + "cudnnSetRNNDescriptor_v6" => "9.0.0", + "cudnnSetRNNDescriptor_v5" => "8.0.1", + "cudnnSetRNNDescriptor" => "8.0.1", + "cudnnSetRNNBiasMode" => "9.0.0", + "cudnnSetRNNAlgorithmDescriptor" => "9.0.0", + "cudnnSetPersistentRNNPlan" => "9.0.0", + "cudnnSetAlgorithmPerformance" => "9.0.0", + "cudnnSetAlgorithmDescriptor" => "9.0.0", + "cudnnSaveAlgorithm" => "9.0.0", + "cudnnRestoreAlgorithm" => "9.0.0", + "cudnnRNNSetClip" => "9.0.0", + "cudnnRNNGetClip" => "9.0.0", + "cudnnRNNForwardTrainingEx" => "9.0.0", + "cudnnRNNForwardTraining" => "9.0.0", + "cudnnRNNForwardInferenceEx" => "9.0.0", + "cudnnRNNForwardInference" => "9.0.0", + "cudnnRNNBackwardWeightsEx" => "9.0.0", + "cudnnRNNBackwardWeights" => "9.0.0", + "cudnnRNNBackwardDataEx" => "9.0.0", + "cudnnRNNBackwardData" => "9.0.0", + "cudnnOpsTrainVersionCheck" => "9.0.0", + "cudnnGetRNNWorkspaceSize" => "9.0.0", + "cudnnGetRNNTrainingReserveSize" => "9.0.0", + "cudnnGetRNNProjectionLayers" => "9.0.0", + "cudnnGetRNNParamsSize" => "9.0.0", + "cudnnGetRNNPaddingMode" => "9.0.0", + "cudnnGetRNNMatrixMathType" => "9.0.0", + "cudnnGetRNNLinLayerMatrixParams" => "9.0.0", + "cudnnGetRNNLinLayerBiasParams" => "9.0.0", + "cudnnGetRNNForwardTrainingAlgorithmMaxCount" => "9.0.0", + "cudnnGetRNNForwardInferenceAlgorithmMaxCount" => "9.0.0", + "cudnnGetRNNDescriptor_v6" => "9.0.0", + "cudnnGetRNNDescriptor" => "8.0.1", + "cudnnGetRNNBiasMode" => "9.0.0", + "cudnnGetRNNBackwardWeightsAlgorithmMaxCount" => "9.0.0", + "cudnnGetRNNBackwardDataAlgorithmMaxCount" => "9.0.0", + "cudnnGetConvolutionForwardAlgorithm" => "8.0.1", + "cudnnGetConvolutionBackwardFilterAlgorithm" => "8.0.1", + "cudnnGetConvolutionBackwardDataAlgorithm" => "8.0.1", + "cudnnGetAlgorithmSpaceSize" => "9.0.0", + "cudnnGetAlgorithmPerformance" => "9.0.0", + "cudnnGetAlgorithmDescriptor" => "9.0.0", + "cudnnFindRNNForwardTrainingAlgorithmEx" => "9.0.0", + "cudnnFindRNNForwardInferenceAlgorithmEx" => "9.0.0", + "cudnnFindRNNBackwardWeightsAlgorithmEx" => "9.0.0", + "cudnnFindRNNBackwardDataAlgorithmEx" => "9.0.0", + "cudnnDestroyPersistentRNNPlan" => "9.0.0", + "cudnnDestroyAlgorithmPerformance" => "9.0.0", + "cudnnDestroyAlgorithmDescriptor" => "9.0.0", + "cudnnCreatePersistentRNNPlan" => "9.0.0", + "cudnnCreateAlgorithmPerformance" => "9.0.0", + "cudnnCreateAlgorithmDescriptor" => "9.0.0", + "cudnnCopyAlgorithmDescriptor" => "9.0.0", + "cudnnConvolutionFwdPreference_t" => "8.0.1", + "cudnnConvolutionBwdFilterPreference_t" => "8.0.1", + "cudnnConvolutionBwdDataPreference_t" => "8.0.1", + "cudnnAttnQueryMap_t" => "9.0.0", + "cudnnAlgorithm_t" => "9.0.0", + "cudnnAlgorithmUnionStruct" => "9.0.0", + "cudnnAlgorithmStruct" => "9.0.0", + "cudnnAlgorithmPerformance_t" => "9.0.0", + "cudnnAlgorithmPerformanceStruct" => "9.0.0", + "cudnnAlgorithmDescriptor_t" => "9.0.0", + "cudnnAdvTrainVersionCheck" => "9.0.0", + "cudnnAdvInferVersionCheck" => "9.0.0", + "cudaUnbindTexture" => "12.0", + "cudaSetupArgument" => "10.1", + "cudaProfilerInitialize" => "12.0", + "cudaOutputMode_t" => "12.0", + "cudaOutputMode" => "12.0", + "cudaLaunch" => "10.1", + "cudaKeyValuePair" => "12.0", + "cudaGetTextureReference" => "12.0", + "cudaGetTextureObjectTextureDesc_v2" => "12.0", + "cudaGetTextureAlignmentOffset" => "12.0", + "cudaGetSurfaceReference" => "12.0", + "cudaDevicePropDontCare" => "12.0", + "cudaCreateTextureObject_v2" => "12.0", + "cudaConfigureCall" => "10.1", + "cudaCSV" => "12.0", + "cudaBindTextureToMipmappedArray" => "12.0", + "cudaBindTextureToArray" => "12.0", + "cudaBindTexture2D" => "12.0", + "cudaBindTexture" => "12.0", + "cudaBindSurfaceToArray" => "12.0", + "csrsv2Info_t" => "12.0", + "csrsv2Info" => "12.0", + "csrsm2Info_t" => "12.0", + "csrsm2Info" => "12.0", + "csrgemm2Info_t" => "12.0", + "csrgemm2Info" => "12.0", + "CUstreamAttrID_enum" => "11.8", + "CUkernelNodeAttrValue_union" => "11.8", + "CUkernelNodeAttrID_enum" => "11.8", + "CU_TARGET_COMPUTE_73" => "10.0", + "CU_TARGET_COMPUTE_21" => "12.0", + "CU_TARGET_COMPUTE_20" => "12.0", + "CU_TARGET_COMPUTE_13" => "9.0", + "CU_TARGET_COMPUTE_12" => "9.0", + "CU_TARGET_COMPUTE_11" => "9.0", + "CU_TARGET_COMPUTE_10" => "9.0", + "CU_GRAPH_NODE_TYPE_COUNT" => "11.0", + "CU_DEVICE_P2P_ATTRIBUTE_ARRAY_ACCESS_ACCESS_SUPPORTED" => "10.1", + "CU_DEVICE_ATTRIBUTE_CAN_USE_STREAM_WAIT_VALUE_NOR_V2" => "12.0", + "CU_DEVICE_ATTRIBUTE_CAN_USE_STREAM_MEM_OPS" => "12.0", + "CU_DEVICE_ATTRIBUTE_CAN_USE_64_BIT_STREAM_MEM_OPS_V2" => "12.0", + "CU_COMPUTEMODE_EXCLUSIVE" => "8.0", + "CUSPARSE_SPMMA_PREPROCESS" => "11.2", + "CUSPARSE_SPMMA_ALG4" => "11.2", + "CUSPARSE_SPMMA_ALG3" => "11.2", + "CUSPARSE_SPMMA_ALG2" => "11.2", + "CUSPARSE_SPMMA_ALG1" => "11.2", + "CUSPARSE_SIDE_RIGHT" => "11.5", + "CUSPARSE_SIDE_LEFT" => "11.5", + "CUSPARSE_MV_ALG_DEFAULT" => "12.0", + "CUSPARSE_MM_ALG_DEFAULT" => "12.0", + "CUSPARSE_HYB_PARTITION_USER" => "11.0", + "CUSPARSE_HYB_PARTITION_MAX" => "11.0", + "CUSPARSE_HYB_PARTITION_AUTO" => "11.0", + "CUSPARSE_FORMAT_COO_AOS" => "12.0", + "CUSPARSE_CSRMV_ALG2" => "12.0", + "CUSPARSE_CSRMV_ALG1" => "12.0", + "CUSPARSE_CSRMM_ALG1" => "12.0", + "CUSPARSE_COOMV_ALG" => "12.0", + "CUSPARSE_COOMM_ALG3" => "12.0", + "CUSPARSE_COOMM_ALG2" => "12.0", + "CUSPARSE_COOMM_ALG1" => "12.0", + "CUSPARSE_ALG_NAIVE" => "11.0", + "CUSPARSE_ALG_MERGE_PATH" => "12.0", + "CUSPARSE_ALG1" => "11.0", + "CUSPARSE_ALG0" => "11.0", + "CUDNN_CONVOLUTION_FWD_SPECIFY_WORKSPACE_LIMIT" => "8.0.1", + "CUDNN_CONVOLUTION_FWD_PREFER_FASTEST" => "8.0.1", + "CUDNN_CONVOLUTION_FWD_NO_WORKSPACE" => "8.0.1", + "CUDNN_CONVOLUTION_BWD_FILTER_SPECIFY_WORKSPACE_LIMIT" => "8.0.1", + "CUDNN_CONVOLUTION_BWD_FILTER_PREFER_FASTEST" => "8.0.1", + "CUDNN_CONVOLUTION_BWD_FILTER_NO_WORKSPACE" => "8.0.1", + "CUDNN_CONVOLUTION_BWD_DATA_SPECIFY_WORKSPACE_LIMIT" => "8.0.1", + "CUDNN_CONVOLUTION_BWD_DATA_PREFER_FASTEST" => "8.0.1", + "CUDNN_CONVOLUTION_BWD_DATA_NO_WORKSPACE" => "8.0.1", + "CUDA_MEM_ALLOC_NODE_PARAMS_st" => "12.2", + "CUDA_BATCH_MEM_OP_NODE_PARAMS_st" => "12.2" +); + +my %experimental_funcs = ( + "cusolverDnXgetrs" => "6.2.0", + "cusolverDnXgetrf_bufferSize" => "6.2.0", + "cusolverDnXgetrf" => "6.2.0", + "cusolverDnSetAdvOptions" => "6.2.0", + "cusolverDnParams_t" => "6.2.0", + "cusolverDnFunction_t" => "6.2.0", + "cusolverDnDestroyParams" => "6.2.0", + "cusolverDnCreateParams" => "6.2.0", + "cusolverAlgMode_t" => "6.2.0", + "curandStateSobol64_t" => "6.2.0", + "curandStateSobol64" => "6.2.0", + "curandStateScrambledSobol64_t" => "6.2.0", + "curandStateScrambledSobol64" => "6.2.0", + "curandStateScrambledSobol32_t" => "6.2.0", + "curandStateScrambledSobol32" => "6.2.0", + "curandSetGeneratorOrdering" => "6.2.0", + "curandOrdering_t" => "6.2.0", + "curandOrdering" => "6.2.0", + "cudaStreamLegacy" => "6.2.0", + "cudaStreamBeginCaptureToGraph" => "6.2.0", + "cudaSetValidDevices" => "6.2.0", + "cudaMemcpy2DArrayToArray" => "6.2.0", + "cudaLaunchAttributeValue" => "6.2.0", + "cudaLaunchAttributePriority" => "6.2.0", + "cudaLaunchAttributeID" => "6.2.0", + "cudaLaunchAttributeCooperative" => "6.2.0", + "cudaLaunchAttributeAccessPolicyWindow" => "6.2.0", + "cudaKernelNodeAttributePriority" => "6.2.0", + "cudaGraphKernelNodePortProgrammatic" => "6.2.0", + "cudaGraphKernelNodePortLaunchCompletion" => "6.2.0", + "cudaGraphKernelNodePortDefault" => "6.2.0", + "cudaGraphInstantiateWithParams" => "6.2.0", + "cudaGraphEdgeData_st" => "6.2.0", + "cudaGraphEdgeData" => "6.2.0", + "cudaGraphDependencyType_enum" => "6.2.0", + "cudaGraphDependencyTypeProgrammatic" => "6.2.0", + "cudaGraphDependencyTypeDefault" => "6.2.0", + "cudaGraphDependencyType" => "6.2.0", + "cudaGraphAddNode" => "6.2.0", + "cudaGetFuncBySymbol" => "6.2.0", + "cudaGetDriverEntryPoint" => "6.2.0", + "cudaDriverEntryPointVersionNotSufficent" => "6.2.0", + "cudaDriverEntryPointSymbolNotFound" => "6.2.0", + "cudaDriverEntryPointSuccess" => "6.2.0", + "cudaDriverEntryPointQueryResult" => "6.2.0", + "cublasZtrsv_v2_64" => "6.2.0", + "cublasZtrsv_64" => "6.2.0", + "cublasZtrmv_v2_64" => "6.2.0", + "cublasZtrmv_64" => "6.2.0", + "cublasZtpsv_v2_64" => "6.2.0", + "cublasZtpsv_64" => "6.2.0", + "cublasZtpmv_v2_64" => "6.2.0", + "cublasZtpmv_64" => "6.2.0", + "cublasZtbsv_v2_64" => "6.2.0", + "cublasZtbsv_64" => "6.2.0", + "cublasZtbmv_v2_64" => "6.2.0", + "cublasZtbmv_64" => "6.2.0", + "cublasZsyr_v2_64" => "6.2.0", + "cublasZsyr_64" => "6.2.0", + "cublasZsyr2_v2_64" => "6.2.0", + "cublasZsyr2_64" => "6.2.0", + "cublasZsymv_v2_64" => "6.2.0", + "cublasZsymv_64" => "6.2.0", + "cublasZhpr_v2_64" => "6.2.0", + "cublasZhpr_64" => "6.2.0", + "cublasZhpr2_v2_64" => "6.2.0", + "cublasZhpr2_64" => "6.2.0", + "cublasZhpmv_v2_64" => "6.2.0", + "cublasZhpmv_64" => "6.2.0", + "cublasZher_v2_64" => "6.2.0", + "cublasZher_64" => "6.2.0", + "cublasZher2_v2_64" => "6.2.0", + "cublasZher2_64" => "6.2.0", + "cublasZhemv_v2_64" => "6.2.0", + "cublasZhemv_64" => "6.2.0", + "cublasZhbmv_v2_64" => "6.2.0", + "cublasZhbmv_64" => "6.2.0", + "cublasZgeru_v2_64" => "6.2.0", + "cublasZgeru_64" => "6.2.0", + "cublasZgerc_v2_64" => "6.2.0", + "cublasZgerc_64" => "6.2.0", + "cublasZgemv_v2_64" => "6.2.0", + "cublasZgemv_64" => "6.2.0", + "cublasZgemvStridedBatched_64" => "6.2.0", + "cublasZgemvBatched_64" => "6.2.0", + "cublasZgbmv_v2_64" => "6.2.0", + "cublasZgbmv_64" => "6.2.0", + "cublasStrsv_v2_64" => "6.2.0", + "cublasStrsv_64" => "6.2.0", + "cublasStrmv_v2_64" => "6.2.0", + "cublasStrmv_64" => "6.2.0", + "cublasStpsv_v2_64" => "6.2.0", + "cublasStpsv_64" => "6.2.0", + "cublasStpmv_v2_64" => "6.2.0", + "cublasStpmv_64" => "6.2.0", + "cublasStbsv_v2_64" => "6.2.0", + "cublasStbsv_64" => "6.2.0", + "cublasStbmv_v2_64" => "6.2.0", + "cublasStbmv_64" => "6.2.0", + "cublasSsyr_v2_64" => "6.2.0", + "cublasSsyr_64" => "6.2.0", + "cublasSsyr2_v2_64" => "6.2.0", + "cublasSsyr2_64" => "6.2.0", + "cublasSsymv_v2_64" => "6.2.0", + "cublasSsymv_64" => "6.2.0", + "cublasSspr_v2_64" => "6.2.0", + "cublasSspr_64" => "6.2.0", + "cublasSspr2_v2_64" => "6.2.0", + "cublasSspr2_64" => "6.2.0", + "cublasSspmv_v2_64" => "6.2.0", + "cublasSspmv_64" => "6.2.0", + "cublasSsbmv_v2_64" => "6.2.0", + "cublasSsbmv_64" => "6.2.0", + "cublasSger_v2_64" => "6.2.0", + "cublasSger_64" => "6.2.0", + "cublasSgemv_v2_64" => "6.2.0", + "cublasSgemv_64" => "6.2.0", + "cublasSgemvStridedBatched_64" => "6.2.0", + "cublasSgemvBatched_64" => "6.2.0", + "cublasSgbmv_v2_64" => "6.2.0", + "cublasSgbmv_64" => "6.2.0", + "cublasScalEx_64" => "6.2.0", + "cublasRotEx_64" => "6.2.0", + "cublasNrm2Ex_64" => "6.2.0", + "cublasDtrsv_v2_64" => "6.2.0", + "cublasDtrsv_64" => "6.2.0", + "cublasDtrmv_v2_64" => "6.2.0", + "cublasDtrmv_64" => "6.2.0", + "cublasDtpsv_v2_64" => "6.2.0", + "cublasDtpsv_64" => "6.2.0", + "cublasDtpmv_v2_64" => "6.2.0", + "cublasDtpmv_64" => "6.2.0", + "cublasDtbsv_v2_64" => "6.2.0", + "cublasDtbsv_64" => "6.2.0", + "cublasDtbmv_v2_64" => "6.2.0", + "cublasDtbmv_64" => "6.2.0", + "cublasDsyr_v2_64" => "6.2.0", + "cublasDsyr_64" => "6.2.0", + "cublasDsyr2_v2_64" => "6.2.0", + "cublasDsyr2_64" => "6.2.0", + "cublasDsymv_v2_64" => "6.2.0", + "cublasDsymv_64" => "6.2.0", + "cublasDspr_v2_64" => "6.2.0", + "cublasDspr_64" => "6.2.0", + "cublasDspr2_v2_64" => "6.2.0", + "cublasDspr2_64" => "6.2.0", + "cublasDspmv_v2_64" => "6.2.0", + "cublasDspmv_64" => "6.2.0", + "cublasDsbmv_v2_64" => "6.2.0", + "cublasDsbmv_64" => "6.2.0", + "cublasDotcEx_64" => "6.2.0", + "cublasDotEx_64" => "6.2.0", + "cublasDger_v2_64" => "6.2.0", + "cublasDger_64" => "6.2.0", + "cublasDgemv_v2_64" => "6.2.0", + "cublasDgemv_64" => "6.2.0", + "cublasDgemvStridedBatched_64" => "6.2.0", + "cublasDgemvBatched_64" => "6.2.0", + "cublasDgbmv_v2_64" => "6.2.0", + "cublasDgbmv_64" => "6.2.0", + "cublasCtrsv_v2_64" => "6.2.0", + "cublasCtrsv_64" => "6.2.0", + "cublasCtrmv_v2_64" => "6.2.0", + "cublasCtrmv_64" => "6.2.0", + "cublasCtpsv_v2_64" => "6.2.0", + "cublasCtpsv_64" => "6.2.0", + "cublasCtpmv_v2_64" => "6.2.0", + "cublasCtpmv_64" => "6.2.0", + "cublasCtbsv_v2_64" => "6.2.0", + "cublasCtbsv_64" => "6.2.0", + "cublasCtbmv_v2_64" => "6.2.0", + "cublasCtbmv_64" => "6.2.0", + "cublasCsyr_v2_64" => "6.2.0", + "cublasCsyr_64" => "6.2.0", + "cublasCsyr2_v2_64" => "6.2.0", + "cublasCsyr2_64" => "6.2.0", + "cublasCsymv_v2_64" => "6.2.0", + "cublasCsymv_64" => "6.2.0", + "cublasChpr_v2_64" => "6.2.0", + "cublasChpr_64" => "6.2.0", + "cublasChpr2_v2_64" => "6.2.0", + "cublasChpr2_64" => "6.2.0", + "cublasChpmv_v2_64" => "6.2.0", + "cublasChpmv_64" => "6.2.0", + "cublasCher_v2_64" => "6.2.0", + "cublasCher_64" => "6.2.0", + "cublasCher2_v2_64" => "6.2.0", + "cublasCher2_64" => "6.2.0", + "cublasChemv_v2_64" => "6.2.0", + "cublasChemv_64" => "6.2.0", + "cublasChbmv_v2_64" => "6.2.0", + "cublasChbmv_64" => "6.2.0", + "cublasCgeru_v2_64" => "6.2.0", + "cublasCgeru_64" => "6.2.0", + "cublasCgerc_v2_64" => "6.2.0", + "cublasCgerc_64" => "6.2.0", + "cublasCgemv_v2_64" => "6.2.0", + "cublasCgemv_64" => "6.2.0", + "cublasCgemvStridedBatched_64" => "6.2.0", + "cublasCgemvBatched_64" => "6.2.0", + "cublasCgbmv_v2_64" => "6.2.0", + "cublasCgbmv_64" => "6.2.0", + "cublasAxpyEx_64" => "6.2.0", + "cuStreamBeginCaptureToGraph" => "6.2.0", + "cuMemcpyHtoAAsync_v2" => "6.2.0", + "cuMemcpyHtoAAsync" => "6.2.0", + "cuMemcpyDtoA_v2" => "6.2.0", + "cuMemcpyDtoA" => "6.2.0", + "cuMemcpyAtoHAsync_v2" => "6.2.0", + "cuMemcpyAtoHAsync" => "6.2.0", + "cuMemcpyAtoD_v2" => "6.2.0", + "cuMemcpyAtoD" => "6.2.0", + "cuMemcpyAtoA_v2" => "6.2.0", + "cuMemcpyAtoA" => "6.2.0", + "cuGraphInstantiateWithParams" => "6.2.0", + "cuGraphAddNode" => "6.2.0", + "cuGetProcAddress" => "6.2.0", + "CUlaunchAttributeValue_union" => "6.2.0", + "CUlaunchAttributeValue" => "6.2.0", + "CUlaunchAttributeID_enum" => "6.2.0", + "CUlaunchAttributeID" => "6.2.0", + "CUgraphEdgeData_st" => "6.2.0", + "CUgraphEdgeData" => "6.2.0", + "CUgraphDependencyType_enum" => "6.2.0", + "CUgraphDependencyType" => "6.2.0", + "CUdriverProcAddressQueryResult_enum" => "6.2.0", + "CUdriverProcAddressQueryResult" => "6.2.0", + "CU_STREAM_LEGACY" => "6.2.0", + "CU_LAUNCH_ATTRIBUTE_PRIORITY" => "6.2.0", + "CU_LAUNCH_ATTRIBUTE_COOPERATIVE" => "6.2.0", + "CU_LAUNCH_ATTRIBUTE_ACCESS_POLICY_WINDOW" => "6.2.0", + "CU_KERNEL_NODE_ATTRIBUTE_PRIORITY" => "6.2.0", + "CU_GRAPH_KERNEL_NODE_PORT_PROGRAMMATIC" => "6.2.0", + "CU_GRAPH_KERNEL_NODE_PORT_LAUNCH_ORDER" => "6.2.0", + "CU_GRAPH_KERNEL_NODE_PORT_DEFAULT" => "6.2.0", + "CU_GRAPH_DEPENDENCY_TYPE_PROGRAMMATIC" => "6.2.0", + "CU_GRAPH_DEPENDENCY_TYPE_DEFAULT" => "6.2.0", + "CU_GET_PROC_ADDRESS_VERSION_NOT_SUFFICIENT" => "6.2.0", + "CU_GET_PROC_ADDRESS_SYMBOL_NOT_FOUND" => "6.2.0", + "CU_GET_PROC_ADDRESS_SUCCESS" => "6.2.0", + "CUSOLVER_ALG_1" => "6.2.0", + "CUSOLVER_ALG_0" => "6.2.0", + "CUSOLVERDN_GETRF" => "6.2.0", + "CURAND_ORDERING_QUASI_DEFAULT" => "6.2.0", + "CURAND_ORDERING_PSEUDO_SEEDED" => "6.2.0", + "CURAND_ORDERING_PSEUDO_LEGACY" => "6.2.0", + "CURAND_ORDERING_PSEUDO_DYNAMIC" => "6.2.0", + "CURAND_ORDERING_PSEUDO_DEFAULT" => "6.2.0", + "CURAND_ORDERING_PSEUDO_BEST" => "6.2.0", + "CUBLASLT_MATMUL_DESC_AMAX_D_POINTER" => "6.2.0" +); + +$print_stats = 1 if $examine; +$no_output = 1 if $examine; + +# Whitelist of cuda[A-Z] identifiers, which are commonly used in CUDA sources but don't map to any CUDA API: +@whitelist = ( + "cudaCloverField" + , "cudaColorSpinorField" + , "cudaCpsGauge" + , "cudaDevice" + , "cudaDeviceId" + , "cudaDevice_t" + , "cudaDevices" + , "cudaDimBlock" + , "cudaDimGrid" + , "cudaEigValueSet" + , "cudaEigVecSet" + , "cudaFatLink" + , "cudaForce" + , "cudaForce_ex" + , "cudaGauge" + , "cudaGaugeField" + , "cudaGaugeTemp" + , "cudaGauge_ex" + , "cudaGradInput" + , "cudaGradOutput" + , "cudaGridDim" + , "cudaIDs" + , "cudaInForce" + , "cudaInGauge" + , "cudaInGaugeEx" + , "cudaInLink" + , "cudaInLinkEx" + , "cudaIndices" + , "cudaInput" + , "cudaLongLink" + , "cudaLongLinkOprod" + , "cudaLongLinkOprod_ex" + , "cudaMemcpys" + , "cudaMom" + , "cudaOprod" + , "cudaOprod_ex" + , "cudaOutForce" + , "cudaOutGauge" + , "cudaOutput" + , "cudaParam" + , "cudaQdpGauge" + , "cudaQuark" + , "cudaResult" + , "cudaRitzVectors" + , "cudaSiteLink" + , "cudaSpinor" + , "cudaSpinorOut" + , "cudaStaple" + , "cudaULink" + , "cudaUnitarizedLink" +); + +push(@whitelist, split(',', $whitelist)); +push(@exclude_dirlist, split(',', $exclude_dirs)); +push(@exclude_filelist, split(',', $exclude_files)); + +# Turn exclude dirlist and exclude_filelist into hash maps +%exclude_dirhash = map { $_ => 1 } @exclude_dirlist; +%exclude_filehash = map { $_ => 1 } @exclude_filelist; + +@statNames = ("error", "init", "version", "device", "context", "module", "library", "memory", "virtual_memory", "ordered_memory", "multicast", "unified", "stream", "event", "external_resource", "stream_memory", "execution", "graph", "occupancy", "texture", "surface", "tensor", "peer", "graphics", "driver_entry_point", "cpp", "coredump", "driver_interact", "profiler", "openGL", "D3D9", "D3D10", "D3D11", "VDPAU", "EGL", "thread", "complex", "library", "device_library", "device_function", "device_type", "include", "include_cuda_main_header", "include_cuda_main_header_v2", "type", "literal", "numeric_literal", "define", "extern_shared", "kernel_launch"); + +sub totalStats { + my %count = %{shift()}; + my $total = 0; + foreach $key (keys %count) { + $total += $count{$key}; + } + return $total; +} + +sub printStats { + my %counts = %{shift()}; + my $warnings = shift(); + my $loc = shift(); + my $fileName = shift(); + my $global = shift(); + my $total = totalStats(\%counts); + printf STDERR "\n[HIPIFY] info: file '$fileName' statistics:\n"; + printf STDERR " CONVERTED refs count: $total\n"; + printf STDERR " TOTAL lines of code: $loc\n"; + printf STDERR " WARNINGS: $warnings\n"; + printf STDERR "[HIPIFY] info: CONVERTED refs by names:\n"; + if ($global) { + foreach my $key (sort keys %tagsToConvertedTagsTotal) { + printf STDERR " %s => %s: %d\n", $key, $tagsToConvertedTagsTotal{$key}, $convertedTagsTotal{$tagsToConvertedTagsTotal{$key}}; + } + } else { + foreach my $key (sort keys %tagsToConvertedTags) { + printf STDERR " %s => %s: %d\n", $key, $tagsToConvertedTags{$key}, $convertedTags{$tagsToConvertedTags{$key}}; + } + } +} + +sub addStats { + my $dest_ref = shift(); + my %adder = %{shift()}; + foreach $key (keys %adder) { + $dest_ref->{$key} += $adder{$key}; + } +} + +sub clearStats { + my $dest_ref = shift(); + my @statNames = @{shift()}; + foreach $stat(@statNames) { + $dest_ref->{$stat} = 0; + } +} + +sub subst { + my $a = shift(); + my $b = shift(); + my $t = shift(); + my $i = ""; + if ($t eq "include" or $t eq "include_cuda_main_header") { + $i = "(?])"; + } + if (my $c += s/$i\b$a\b/$b/g) { + $ft{$t} += $c; + $tags{$a} +=$c; + $tagsTotal{$a} +=$c; + $convertedTags{$b} +=$c; + $convertedTagsTotal{$b} +=$c; + $tagsToConvertedTags{$a} = $b; + $tagsToConvertedTagsTotal{$a} = $b; + } +} + +sub experimentalSubstitutions { + subst("cudaSetValidDevices", "hipSetValidDevices", "device"); + subst("cuMemcpyAtoA", "hipMemcpyAtoA", "memory"); + subst("cuMemcpyAtoA_v2", "hipMemcpyAtoA", "memory"); + subst("cuMemcpyAtoD", "hipMemcpyAtoD", "memory"); + subst("cuMemcpyAtoD_v2", "hipMemcpyAtoD", "memory"); + subst("cuMemcpyAtoHAsync", "hipMemcpyAtoHAsync", "memory"); + subst("cuMemcpyAtoHAsync_v2", "hipMemcpyAtoHAsync", "memory"); + subst("cuMemcpyDtoA", "hipMemcpyDtoA", "memory"); + subst("cuMemcpyDtoA_v2", "hipMemcpyDtoA", "memory"); + subst("cuMemcpyHtoAAsync", "hipMemcpyHtoAAsync", "memory"); + subst("cuMemcpyHtoAAsync_v2", "hipMemcpyHtoAAsync", "memory"); + subst("cudaMemcpy2DArrayToArray", "hipMemcpy2DArrayToArray", "memory"); + subst("cuStreamBeginCaptureToGraph", "hipStreamBeginCaptureToGraph", "stream"); + subst("cudaStreamBeginCaptureToGraph", "hipStreamBeginCaptureToGraph", "stream"); + subst("cuGraphAddNode", "hipGraphAddNode", "graph"); + subst("cuGraphInstantiateWithParams", "hipGraphInstantiateWithParams", "graph"); + subst("cudaGraphAddNode", "hipGraphAddNode", "graph"); + subst("cudaGraphInstantiateWithParams", "hipGraphInstantiateWithParams", "graph"); + subst("cuGetProcAddress", "hipGetProcAddress", "driver_entry_point"); + subst("cudaGetDriverEntryPoint", "hipGetProcAddress", "driver_entry_point"); + subst("cudaGetFuncBySymbol", "hipGetFuncBySymbol", "driver_interact"); + subst("cublasAxpyEx_64", "hipblasAxpyEx_v2_64", "library"); + subst("cublasCgbmv_64", "hipblasCgbmv_v2_64", "library"); + subst("cublasCgbmv_v2_64", "hipblasCgbmv_v2_64", "library"); + subst("cublasCgemvBatched_64", "hipblasCgemvBatched_v2_64", "library"); + subst("cublasCgemvStridedBatched_64", "hipblasCgemvStridedBatched_v2_64", "library"); + subst("cublasCgemv_64", "hipblasCgemv_v2_64", "library"); + subst("cublasCgemv_v2_64", "hipblasCgemv_v2_64", "library"); + subst("cublasCgerc_64", "hipblasCgerc_v2_64", "library"); + subst("cublasCgerc_v2_64", "hipblasCgerc_v2_64", "library"); + subst("cublasCgeru_64", "hipblasCgeru_v2_64", "library"); + subst("cublasCgeru_v2_64", "hipblasCgeru_v2_64", "library"); + subst("cublasChbmv_64", "hipblasChbmv_v2_64", "library"); + subst("cublasChbmv_v2_64", "hipblasChbmv_v2_64", "library"); + subst("cublasChemv_64", "hipblasChemv_v2_64", "library"); + subst("cublasChemv_v2_64", "hipblasChemv_v2_64", "library"); + subst("cublasCher2_64", "hipblasCher2_v2_64", "library"); + subst("cublasCher2_v2_64", "hipblasCher2_v2_64", "library"); + subst("cublasCher_64", "hipblasCher_v2_64", "library"); + subst("cublasCher_v2_64", "hipblasCher_v2_64", "library"); + subst("cublasChpmv_64", "hipblasChpmv_v2_64", "library"); + subst("cublasChpmv_v2_64", "hipblasChpmv_v2_64", "library"); + subst("cublasChpr2_64", "hipblasChpr2_v2_64", "library"); + subst("cublasChpr2_v2_64", "hipblasChpr2_v2_64", "library"); + subst("cublasChpr_64", "hipblasChpr_v2_64", "library"); + subst("cublasChpr_v2_64", "hipblasChpr_v2_64", "library"); + subst("cublasCsymv_64", "hipblasCsymv_v2_64", "library"); + subst("cublasCsymv_v2_64", "hipblasCsymv_v2_64", "library"); + subst("cublasCsyr2_64", "hipblasCsyr2_v2_64", "library"); + subst("cublasCsyr2_v2_64", "hipblasCsyr2_v2_64", "library"); + subst("cublasCsyr_64", "hipblasCsyr_v2_64", "library"); + subst("cublasCsyr_v2_64", "hipblasCsyr_v2_64", "library"); + subst("cublasCtbmv_64", "hipblasCtbmv_v2_64", "library"); + subst("cublasCtbmv_v2_64", "hipblasCtbmv_v2_64", "library"); + subst("cublasCtbsv_64", "hipblasCtbsv_v2_64", "library"); + subst("cublasCtbsv_v2_64", "hipblasCtbsv_v2_64", "library"); + subst("cublasCtpmv_64", "hipblasCtpmv_v2_64", "library"); + subst("cublasCtpmv_v2_64", "hipblasCtpmv_v2_64", "library"); + subst("cublasCtpsv_64", "hipblasCtpsv_v2_64", "library"); + subst("cublasCtpsv_v2_64", "hipblasCtpsv_v2_64", "library"); + subst("cublasCtrmv_64", "hipblasCtrmv_v2_64", "library"); + subst("cublasCtrmv_v2_64", "hipblasCtrmv_v2_64", "library"); + subst("cublasCtrsv_64", "hipblasCtrsv_v2_64", "library"); + subst("cublasCtrsv_v2_64", "hipblasCtrsv_v2_64", "library"); + subst("cublasDgbmv_64", "hipblasDgbmv_64", "library"); + subst("cublasDgbmv_v2_64", "hipblasDgbmv_64", "library"); + subst("cublasDgemvBatched_64", "hipblasDgemvBatched_64", "library"); + subst("cublasDgemvStridedBatched_64", "hipblasDgemvStridedBatched_64", "library"); + subst("cublasDgemv_64", "hipblasDgemv_64", "library"); + subst("cublasDgemv_v2_64", "hipblasDgemv_64", "library"); + subst("cublasDger_64", "hipblasDger_64", "library"); + subst("cublasDger_v2_64", "hipblasDger_64", "library"); + subst("cublasDotEx_64", "hipblasDotEx_v2_64", "library"); + subst("cublasDotcEx_64", "hipblasDotcEx_v2_64", "library"); + subst("cublasDsbmv_64", "hipblasDsbmv_64", "library"); + subst("cublasDsbmv_v2_64", "hipblasDsbmv_64", "library"); + subst("cublasDspmv_64", "hipblasDspmv_64", "library"); + subst("cublasDspmv_v2_64", "hipblasDspmv_64", "library"); + subst("cublasDspr2_64", "hipblasDspr2_64", "library"); + subst("cublasDspr2_v2_64", "hipblasDspr2_64", "library"); + subst("cublasDspr_64", "hipblasDspr_64", "library"); + subst("cublasDspr_v2_64", "hipblasDspr_64", "library"); + subst("cublasDsymv_64", "hipblasDsymv_64", "library"); + subst("cublasDsymv_v2_64", "hipblasDsymv_64", "library"); + subst("cublasDsyr2_64", "hipblasDsyr2_64", "library"); + subst("cublasDsyr2_v2_64", "hipblasDsyr2_64", "library"); + subst("cublasDsyr_64", "hipblasDsyr_64", "library"); + subst("cublasDsyr_v2_64", "hipblasDsyr_64", "library"); + subst("cublasDtbmv_64", "hipblasDtbmv_64", "library"); + subst("cublasDtbmv_v2_64", "hipblasDtbmv_64", "library"); + subst("cublasDtbsv_64", "hipblasDtbsv_64", "library"); + subst("cublasDtbsv_v2_64", "hipblasDtbsv_64", "library"); + subst("cublasDtpmv_64", "hipblasDtpmv_64", "library"); + subst("cublasDtpmv_v2_64", "hipblasDtpmv_64", "library"); + subst("cublasDtpsv_64", "hipblasDtpsv_64", "library"); + subst("cublasDtpsv_v2_64", "hipblasDtpsv_64", "library"); + subst("cublasDtrmv_64", "hipblasDtrmv_64", "library"); + subst("cublasDtrmv_v2_64", "hipblasDtrmv_64", "library"); + subst("cublasDtrsv_64", "hipblasDtrsv_64", "library"); + subst("cublasDtrsv_v2_64", "hipblasDtrsv_64", "library"); + subst("cublasNrm2Ex_64", "hipblasNrm2Ex_v2_64", "library"); + subst("cublasRotEx_64", "hipblasRotEx_v2_64", "library"); + subst("cublasScalEx_64", "hipblasScalEx_v2_64", "library"); + subst("cublasSgbmv_64", "hipblasSgbmv_64", "library"); + subst("cublasSgbmv_v2_64", "hipblasSgbmv_64", "library"); + subst("cublasSgemvBatched_64", "hipblasSgemvBatched_64", "library"); + subst("cublasSgemvStridedBatched_64", "hipblasSgemvStridedBatched_64", "library"); + subst("cublasSgemv_64", "hipblasSgemv_64", "library"); + subst("cublasSgemv_v2_64", "hipblasSgemv_64", "library"); + subst("cublasSger_64", "hipblasSger_64", "library"); + subst("cublasSger_v2_64", "hipblasSger_64", "library"); + subst("cublasSsbmv_64", "hipblasSsbmv_64", "library"); + subst("cublasSsbmv_v2_64", "hipblasSsbmv_64", "library"); + subst("cublasSspmv_64", "hipblasSspmv_64", "library"); + subst("cublasSspmv_v2_64", "hipblasSspmv_64", "library"); + subst("cublasSspr2_64", "hipblasSspr2_64", "library"); + subst("cublasSspr2_v2_64", "hipblasSspr2_64", "library"); + subst("cublasSspr_64", "hipblasSspr_64", "library"); + subst("cublasSspr_v2_64", "hipblasSspr_64", "library"); + subst("cublasSsymv_64", "hipblasSsymv_64", "library"); + subst("cublasSsymv_v2_64", "hipblasSsymv_64", "library"); + subst("cublasSsyr2_64", "hipblasSsyr2_64", "library"); + subst("cublasSsyr2_v2_64", "hipblasSsyr2_64", "library"); + subst("cublasSsyr_64", "hipblasSsyr_64", "library"); + subst("cublasSsyr_v2_64", "hipblasSsyr_64", "library"); + subst("cublasStbmv_64", "hipblasStbmv_64", "library"); + subst("cublasStbmv_v2_64", "hipblasStbmv_64", "library"); + subst("cublasStbsv_64", "hipblasStbsv_64", "library"); + subst("cublasStbsv_v2_64", "hipblasStbsv_64", "library"); + subst("cublasStpmv_64", "hipblasStpmv_64", "library"); + subst("cublasStpmv_v2_64", "hipblasStpmv_64", "library"); + subst("cublasStpsv_64", "hipblasStpsv_64", "library"); + subst("cublasStpsv_v2_64", "hipblasStpsv_64", "library"); + subst("cublasStrmv_64", "hipblasStrmv_64", "library"); + subst("cublasStrmv_v2_64", "hipblasStrmv_64", "library"); + subst("cublasStrsv_64", "hipblasStrsv_64", "library"); + subst("cublasStrsv_v2_64", "hipblasStrsv_64", "library"); + subst("cublasZgbmv_64", "hipblasZgbmv_v2_64", "library"); + subst("cublasZgbmv_v2_64", "hipblasZgbmv_v2_64", "library"); + subst("cublasZgemvBatched_64", "hipblasZgemvBatched_v2_64", "library"); + subst("cublasZgemvStridedBatched_64", "hipblasZgemvStridedBatched_v2_64", "library"); + subst("cublasZgemv_64", "hipblasZgemv_v2_64", "library"); + subst("cublasZgemv_v2_64", "hipblasZgemv_v2_64", "library"); + subst("cublasZgerc_64", "hipblasZgerc_v2_64", "library"); + subst("cublasZgerc_v2_64", "hipblasZgerc_v2_64", "library"); + subst("cublasZgeru_64", "hipblasZgeru_v2_64", "library"); + subst("cublasZgeru_v2_64", "hipblasZgeru_v2_64", "library"); + subst("cublasZhbmv_64", "hipblasZhbmv_v2_64", "library"); + subst("cublasZhbmv_v2_64", "hipblasZhbmv_v2_64", "library"); + subst("cublasZhemv_64", "hipblasZhemv_v2_64", "library"); + subst("cublasZhemv_v2_64", "hipblasZhemv_v2_64", "library"); + subst("cublasZher2_64", "hipblasZher2_v2_64", "library"); + subst("cublasZher2_v2_64", "hipblasZher2_v2_64", "library"); + subst("cublasZher_64", "hipblasZher_v2_64", "library"); + subst("cublasZher_v2_64", "hipblasZher_v2_64", "library"); + subst("cublasZhpmv_64", "hipblasZhpmv_v2_64", "library"); + subst("cublasZhpmv_v2_64", "hipblasZhpmv_v2_64", "library"); + subst("cublasZhpr2_64", "hipblasZhpr2_v2_64", "library"); + subst("cublasZhpr2_v2_64", "hipblasZhpr2_v2_64", "library"); + subst("cublasZhpr_64", "hipblasZhpr_v2_64", "library"); + subst("cublasZhpr_v2_64", "hipblasZhpr_v2_64", "library"); + subst("cublasZsymv_64", "hipblasZsymv_v2_64", "library"); + subst("cublasZsymv_v2_64", "hipblasZsymv_v2_64", "library"); + subst("cublasZsyr2_64", "hipblasZsyr2_v2_64", "library"); + subst("cublasZsyr2_v2_64", "hipblasZsyr2_v2_64", "library"); + subst("cublasZsyr_64", "hipblasZsyr_v2_64", "library"); + subst("cublasZsyr_v2_64", "hipblasZsyr_v2_64", "library"); + subst("cublasZtbmv_64", "hipblasZtbmv_v2_64", "library"); + subst("cublasZtbmv_v2_64", "hipblasZtbmv_v2_64", "library"); + subst("cublasZtbsv_64", "hipblasZtbsv_v2_64", "library"); + subst("cublasZtbsv_v2_64", "hipblasZtbsv_v2_64", "library"); + subst("cublasZtpmv_64", "hipblasZtpmv_v2_64", "library"); + subst("cublasZtpmv_v2_64", "hipblasZtpmv_v2_64", "library"); + subst("cublasZtpsv_64", "hipblasZtpsv_v2_64", "library"); + subst("cublasZtpsv_v2_64", "hipblasZtpsv_v2_64", "library"); + subst("cublasZtrmv_64", "hipblasZtrmv_v2_64", "library"); + subst("cublasZtrmv_v2_64", "hipblasZtrmv_v2_64", "library"); + subst("cublasZtrsv_64", "hipblasZtrsv_v2_64", "library"); + subst("cublasZtrsv_v2_64", "hipblasZtrsv_v2_64", "library"); + subst("curandSetGeneratorOrdering", "hiprandSetGeneratorOrdering", "library"); + subst("cusolverDnCreateParams", "hipsolverDnCreateParams", "library"); + subst("cusolverDnDestroyParams", "hipsolverDnDestroyParams", "library"); + subst("cusolverDnSetAdvOptions", "hipsolverDnSetAdvOptions", "library"); + subst("cusolverDnXgetrf", "hipsolverDnXgetrf", "library"); + subst("cusolverDnXgetrf_bufferSize", "hipsolverDnXgetrf_bufferSize", "library"); + subst("cusolverDnXgetrs", "hipsolverDnXgetrs", "library"); + subst("CUdriverProcAddressQueryResult", "hipDriverProcAddressQueryResult", "type"); + subst("CUdriverProcAddressQueryResult_enum", "hipDriverProcAddressQueryResult", "type"); + subst("CUgraphDependencyType", "hipGraphDependencyType", "type"); + subst("CUgraphDependencyType_enum", "hipGraphDependencyType", "type"); + subst("CUgraphEdgeData", "hipGraphEdgeData", "type"); + subst("CUgraphEdgeData_st", "hipGraphEdgeData", "type"); + subst("CUlaunchAttributeID", "hipLaunchAttributeID", "type"); + subst("CUlaunchAttributeID_enum", "hipLaunchAttributeID", "type"); + subst("CUlaunchAttributeValue", "hipLaunchAttributeValue", "type"); + subst("CUlaunchAttributeValue_union", "hipLaunchAttributeValue", "type"); + subst("cudaDriverEntryPointQueryResult", "hipDriverProcAddressQueryResult", "type"); + subst("cudaGraphDependencyType", "hipGraphDependencyType", "type"); + subst("cudaGraphDependencyType_enum", "hipGraphDependencyType", "type"); + subst("cudaGraphEdgeData", "hipGraphEdgeData", "type"); + subst("cudaGraphEdgeData_st", "hipGraphEdgeData", "type"); + subst("cudaLaunchAttributeID", "hipLaunchAttributeID", "type"); + subst("cudaLaunchAttributeValue", "hipLaunchAttributeValue", "type"); + subst("curandOrdering", "hiprandOrdering", "type"); + subst("curandOrdering_t", "hiprandOrdering_t", "type"); + subst("curandStateScrambledSobol32", "hiprandStateScrambledSobol32", "type"); + subst("curandStateScrambledSobol32_t", "hiprandStateScrambledSobol32_t", "type"); + subst("curandStateScrambledSobol64", "hiprandStateScrambledSobol64", "type"); + subst("curandStateScrambledSobol64_t", "hiprandStateScrambledSobol64_t", "type"); + subst("curandStateSobol64", "hiprandStateSobol64", "type"); + subst("curandStateSobol64_t", "hiprandStateSobol64_t", "type"); + subst("cusolverAlgMode_t", "hipsolverAlgMode_t", "type"); + subst("cusolverDnFunction_t", "hipsolverDnFunction_t", "type"); + subst("cusolverDnParams_t", "hipsolverDnParams_t", "type"); + subst("CUBLASLT_MATMUL_DESC_AMAX_D_POINTER", "HIPBLASLT_MATMUL_DESC_AMAX_D_POINTER", "numeric_literal"); + subst("CURAND_ORDERING_PSEUDO_BEST", "HIPRAND_ORDERING_PSEUDO_BEST", "numeric_literal"); + subst("CURAND_ORDERING_PSEUDO_DEFAULT", "HIPRAND_ORDERING_PSEUDO_DEFAULT", "numeric_literal"); + subst("CURAND_ORDERING_PSEUDO_DYNAMIC", "HIPRAND_ORDERING_PSEUDO_DYNAMIC", "numeric_literal"); + subst("CURAND_ORDERING_PSEUDO_LEGACY", "HIPRAND_ORDERING_PSEUDO_LEGACY", "numeric_literal"); + subst("CURAND_ORDERING_PSEUDO_SEEDED", "HIPRAND_ORDERING_PSEUDO_SEEDED", "numeric_literal"); + subst("CURAND_ORDERING_QUASI_DEFAULT", "HIPRAND_ORDERING_QUASI_DEFAULT", "numeric_literal"); + subst("CUSOLVERDN_GETRF", "HIPSOLVERDN_GETRF", "numeric_literal"); + subst("CUSOLVER_ALG_0", "HIPSOLVER_ALG_0", "numeric_literal"); + subst("CUSOLVER_ALG_1", "HIPSOLVER_ALG_1", "numeric_literal"); + subst("CU_GET_PROC_ADDRESS_SUCCESS", "HIP_GET_PROC_ADDRESS_SUCCESS", "numeric_literal"); + subst("CU_GET_PROC_ADDRESS_SYMBOL_NOT_FOUND", "HIP_GET_PROC_ADDRESS_SYMBOL_NOT_FOUND", "numeric_literal"); + subst("CU_GET_PROC_ADDRESS_VERSION_NOT_SUFFICIENT", "HIP_GET_PROC_ADDRESS_VERSION_NOT_SUFFICIENT", "numeric_literal"); + subst("CU_GRAPH_DEPENDENCY_TYPE_DEFAULT", "hipGraphDependencyTypeDefault", "numeric_literal"); + subst("CU_GRAPH_DEPENDENCY_TYPE_PROGRAMMATIC", "hipGraphDependencyTypeProgrammatic", "numeric_literal"); + subst("CU_KERNEL_NODE_ATTRIBUTE_PRIORITY", "hipKernelNodeAttributePriority", "numeric_literal"); + subst("CU_LAUNCH_ATTRIBUTE_ACCESS_POLICY_WINDOW", "hipLaunchAttributeAccessPolicyWindow", "numeric_literal"); + subst("CU_LAUNCH_ATTRIBUTE_COOPERATIVE", "hipLaunchAttributeCooperative", "numeric_literal"); + subst("CU_LAUNCH_ATTRIBUTE_PRIORITY", "hipLaunchAttributePriority", "numeric_literal"); + subst("cudaDriverEntryPointSuccess", "HIP_GET_PROC_ADDRESS_SUCCESS", "numeric_literal"); + subst("cudaDriverEntryPointSymbolNotFound", "HIP_GET_PROC_ADDRESS_SYMBOL_NOT_FOUND", "numeric_literal"); + subst("cudaDriverEntryPointVersionNotSufficent", "HIP_GET_PROC_ADDRESS_VERSION_NOT_SUFFICIENT", "numeric_literal"); + subst("cudaGraphDependencyTypeDefault", "hipGraphDependencyTypeDefault", "numeric_literal"); + subst("cudaGraphDependencyTypeProgrammatic", "hipGraphDependencyTypeProgrammatic", "numeric_literal"); + subst("cudaKernelNodeAttributePriority", "hipKernelNodeAttributePriority", "numeric_literal"); + subst("cudaLaunchAttributeAccessPolicyWindow", "hipLaunchAttributeAccessPolicyWindow", "numeric_literal"); + subst("cudaLaunchAttributeCooperative", "hipLaunchAttributeCooperative", "numeric_literal"); + subst("cudaLaunchAttributePriority", "hipLaunchAttributePriority", "numeric_literal"); + subst("CU_GRAPH_KERNEL_NODE_PORT_DEFAULT", "hipGraphKernelNodePortDefault", "define"); + subst("CU_GRAPH_KERNEL_NODE_PORT_LAUNCH_ORDER", "hipGraphKernelNodePortLaunchCompletion", "define"); + subst("CU_GRAPH_KERNEL_NODE_PORT_PROGRAMMATIC", "hipGraphKernelNodePortProgrammatic", "define"); + subst("CU_STREAM_LEGACY", "hipStreamLegacy", "define"); + subst("cudaGraphKernelNodePortDefault", "hipGraphKernelNodePortDefault", "define"); + subst("cudaGraphKernelNodePortLaunchCompletion", "hipGraphKernelNodePortLaunchCompletion", "define"); + subst("cudaGraphKernelNodePortProgrammatic", "hipGraphKernelNodePortProgrammatic", "define"); + subst("cudaStreamLegacy", "hipStreamLegacy", "define"); +} + +sub rocSubstitutions { + subst("cublasAxpyEx", "rocblas_axpy_ex", "library"); + subst("cublasAxpyEx_64", "rocblas_axpy_ex_64", "library"); + subst("cublasCaxpy", "rocblas_caxpy", "library"); + subst("cublasCaxpy_64", "rocblas_caxpy_64", "library"); + subst("cublasCaxpy_v2", "rocblas_caxpy", "library"); + subst("cublasCaxpy_v2_64", "rocblas_caxpy_64", "library"); + subst("cublasCcopy", "rocblas_ccopy", "library"); + subst("cublasCcopy_64", "rocblas_ccopy_64", "library"); + subst("cublasCcopy_v2", "rocblas_ccopy", "library"); + subst("cublasCcopy_v2_64", "rocblas_ccopy_64", "library"); + subst("cublasCdgmm", "rocblas_cdgmm", "library"); + subst("cublasCdotc", "rocblas_cdotc", "library"); + subst("cublasCdotc_64", "rocblas_cdotc_64", "library"); + subst("cublasCdotc_v2", "rocblas_cdotc", "library"); + subst("cublasCdotc_v2_64", "rocblas_cdotc_64", "library"); + subst("cublasCdotu", "rocblas_cdotu", "library"); + subst("cublasCdotu_64", "rocblas_cdotu_64", "library"); + subst("cublasCdotu_v2", "rocblas_cdotu", "library"); + subst("cublasCdotu_v2_64", "rocblas_cdotu_64", "library"); + subst("cublasCgbmv", "rocblas_cgbmv", "library"); + subst("cublasCgbmv_v2", "rocblas_cgbmv", "library"); + subst("cublasCgeam", "rocblas_cgeam", "library"); + subst("cublasCgemm", "rocblas_cgemm", "library"); + subst("cublasCgemmBatched", "rocblas_cgemm_batched", "library"); + subst("cublasCgemmStridedBatched", "rocblas_cgemm_strided_batched", "library"); + subst("cublasCgemm_v2", "rocblas_cgemm", "library"); + subst("cublasCgemv", "rocblas_cgemv", "library"); + subst("cublasCgemvBatched", "rocblas_cgemv_batched", "library"); + subst("cublasCgemvStridedBatched", "rocblas_cgemv_strided_batched", "library"); + subst("cublasCgemv_v2", "rocblas_cgemv", "library"); + subst("cublasCgerc", "rocblas_cgerc", "library"); + subst("cublasCgerc_v2", "rocblas_cgerc", "library"); + subst("cublasCgeru", "rocblas_cgeru", "library"); + subst("cublasCgeru_v2", "rocblas_cgeru", "library"); + subst("cublasChbmv", "rocblas_chbmv", "library"); + subst("cublasChbmv_v2", "rocblas_chbmv", "library"); + subst("cublasChemm", "rocblas_chemm", "library"); + subst("cublasChemm_v2", "rocblas_chemm", "library"); + subst("cublasChemv", "rocblas_chemv", "library"); + subst("cublasChemv_v2", "rocblas_chemv", "library"); + subst("cublasCher", "rocblas_cher", "library"); + subst("cublasCher2", "rocblas_cher2", "library"); + subst("cublasCher2_v2", "rocblas_cher2", "library"); + subst("cublasCher2k", "rocblas_cher2k", "library"); + subst("cublasCher2k_v2", "rocblas_cher2k", "library"); + subst("cublasCher_v2", "rocblas_cher", "library"); + subst("cublasCherk", "rocblas_cherk", "library"); + subst("cublasCherk_v2", "rocblas_cherk", "library"); + subst("cublasCherkx", "rocblas_cherkx", "library"); + subst("cublasChpmv", "rocblas_chpmv", "library"); + subst("cublasChpmv_v2", "rocblas_chpmv", "library"); + subst("cublasChpr", "rocblas_chpr", "library"); + subst("cublasChpr2", "rocblas_chpr2", "library"); + subst("cublasChpr2_v2", "rocblas_chpr2", "library"); + subst("cublasChpr_v2", "rocblas_chpr", "library"); + subst("cublasCreate", "rocblas_create_handle", "library"); + subst("cublasCreate_v2", "rocblas_create_handle", "library"); + subst("cublasCrot", "rocblas_crot", "library"); + subst("cublasCrot_64", "rocblas_crot_64", "library"); + subst("cublasCrot_v2", "rocblas_crot", "library"); + subst("cublasCrot_v2_64", "rocblas_crot_64", "library"); + subst("cublasCrotg", "rocblas_crotg", "library"); + subst("cublasCrotg_v2", "rocblas_crotg", "library"); + subst("cublasCscal", "rocblas_cscal", "library"); + subst("cublasCscal_64", "rocblas_cscal_64", "library"); + subst("cublasCscal_v2", "rocblas_cscal", "library"); + subst("cublasCscal_v2_64", "rocblas_cscal_64", "library"); + subst("cublasCsrot", "rocblas_csrot", "library"); + subst("cublasCsrot_64", "rocblas_csrot_64", "library"); + subst("cublasCsrot_v2", "rocblas_csrot", "library"); + subst("cublasCsrot_v2_64", "rocblas_csrot_64", "library"); + subst("cublasCsscal", "rocblas_csscal", "library"); + subst("cublasCsscal_64", "rocblas_csscal_64", "library"); + subst("cublasCsscal_v2", "rocblas_csscal", "library"); + subst("cublasCsscal_v2_64", "rocblas_csscal_64", "library"); + subst("cublasCswap", "rocblas_cswap", "library"); + subst("cublasCswap_64", "rocblas_cswap_64", "library"); + subst("cublasCswap_v2", "rocblas_cswap", "library"); + subst("cublasCswap_v2_64", "rocblas_cswap_64", "library"); + subst("cublasCsymm", "rocblas_csymm", "library"); + subst("cublasCsymm_v2", "rocblas_csymm", "library"); + subst("cublasCsymv", "rocblas_csymv", "library"); + subst("cublasCsymv_v2", "rocblas_csymv", "library"); + subst("cublasCsyr", "rocblas_csyr", "library"); + subst("cublasCsyr2", "rocblas_csyr2", "library"); + subst("cublasCsyr2_v2", "rocblas_csyr2", "library"); + subst("cublasCsyr2k", "rocblas_csyr2k", "library"); + subst("cublasCsyr2k_v2", "rocblas_csyr2k", "library"); + subst("cublasCsyr_v2", "rocblas_csyr", "library"); + subst("cublasCsyrk", "rocblas_csyrk", "library"); + subst("cublasCsyrk_v2", "rocblas_csyrk", "library"); + subst("cublasCsyrkx", "rocblas_csyrkx", "library"); + subst("cublasCtbmv", "rocblas_ctbmv", "library"); + subst("cublasCtbmv_v2", "rocblas_ctbmv", "library"); + subst("cublasCtbsv", "rocblas_ctbsv", "library"); + subst("cublasCtbsv_v2", "rocblas_ctbsv", "library"); + subst("cublasCtpmv", "rocblas_ctpmv", "library"); + subst("cublasCtpmv_v2", "rocblas_ctpmv", "library"); + subst("cublasCtpsv", "rocblas_ctpsv", "library"); + subst("cublasCtpsv_v2", "rocblas_ctpsv", "library"); + subst("cublasCtrmm", "rocblas_ctrmm", "library"); + subst("cublasCtrmm_v2", "rocblas_ctrmm", "library"); + subst("cublasCtrmv", "rocblas_ctrmv", "library"); + subst("cublasCtrmv_v2", "rocblas_ctrmv", "library"); + subst("cublasCtrsm", "rocblas_ctrsm", "library"); + subst("cublasCtrsmBatched", "rocblas_ctrsm_batched", "library"); + subst("cublasCtrsm_v2", "rocblas_ctrsm", "library"); + subst("cublasCtrsv", "rocblas_ctrsv", "library"); + subst("cublasCtrsv_v2", "rocblas_ctrsv", "library"); + subst("cublasDasum", "rocblas_dasum", "library"); + subst("cublasDasum_64", "rocblas_dasum_64", "library"); + subst("cublasDasum_v2", "rocblas_dasum", "library"); + subst("cublasDasum_v2_64", "rocblas_dasum_64", "library"); + subst("cublasDaxpy", "rocblas_daxpy", "library"); + subst("cublasDaxpy_64", "rocblas_daxpy_64", "library"); + subst("cublasDaxpy_v2", "rocblas_daxpy", "library"); + subst("cublasDaxpy_v2_64", "rocblas_daxpy_64", "library"); + subst("cublasDcopy", "rocblas_dcopy", "library"); + subst("cublasDcopy_64", "rocblas_dcopy_64", "library"); + subst("cublasDcopy_v2", "rocblas_dcopy", "library"); + subst("cublasDcopy_v2_64", "rocblas_dcopy_64", "library"); + subst("cublasDdgmm", "rocblas_ddgmm", "library"); + subst("cublasDdot", "rocblas_ddot", "library"); + subst("cublasDdot_64", "rocblas_ddot_64", "library"); + subst("cublasDdot_v2", "rocblas_ddot", "library"); + subst("cublasDdot_v2_64", "rocblas_ddot_64", "library"); + subst("cublasDestroy", "rocblas_destroy_handle", "library"); + subst("cublasDestroy_v2", "rocblas_destroy_handle", "library"); + subst("cublasDgbmv", "rocblas_dgbmv", "library"); + subst("cublasDgbmv_v2", "rocblas_dgbmv", "library"); + subst("cublasDgeam", "rocblas_dgeam", "library"); + subst("cublasDgemm", "rocblas_dgemm", "library"); + subst("cublasDgemmBatched", "rocblas_dgemm_batched", "library"); + subst("cublasDgemmStridedBatched", "rocblas_dgemm_strided_batched", "library"); + subst("cublasDgemm_v2", "rocblas_dgemm", "library"); + subst("cublasDgemv", "rocblas_dgemv", "library"); + subst("cublasDgemv_v2", "rocblas_dgemv", "library"); + subst("cublasDger", "rocblas_dger", "library"); + subst("cublasDger_v2", "rocblas_dger", "library"); + subst("cublasDnrm2", "rocblas_dnrm2", "library"); + subst("cublasDnrm2_64", "rocblas_dnrm2_64", "library"); + subst("cublasDnrm2_v2", "rocblas_dnrm2", "library"); + subst("cublasDnrm2_v2_64", "rocblas_dnrm2_64", "library"); + subst("cublasDotEx", "rocblas_dot_ex", "library"); + subst("cublasDotEx_64", "rocblas_dot_ex_64", "library"); + subst("cublasDotcEx", "rocblas_dotc_ex", "library"); + subst("cublasDotcEx_64", "rocblas_dotc_ex_64", "library"); + subst("cublasDrot", "rocblas_drot", "library"); + subst("cublasDrot_64", "rocblas_drot_64", "library"); + subst("cublasDrot_v2", "rocblas_drot", "library"); + subst("cublasDrot_v2_64", "rocblas_drot_64", "library"); + subst("cublasDrotg", "rocblas_drotg", "library"); + subst("cublasDrotg_v2", "rocblas_drotg", "library"); + subst("cublasDrotm", "rocblas_drotm", "library"); + subst("cublasDrotm_64", "rocblas_drotm_64", "library"); + subst("cublasDrotm_v2", "rocblas_drotm", "library"); + subst("cublasDrotm_v2_64", "rocblas_drotm_64", "library"); + subst("cublasDrotmg", "rocblas_drotmg", "library"); + subst("cublasDrotmg_v2", "rocblas_drotmg", "library"); + subst("cublasDsbmv", "rocblas_dsbmv", "library"); + subst("cublasDsbmv_v2", "rocblas_dsbmv", "library"); + subst("cublasDscal", "rocblas_dscal", "library"); + subst("cublasDscal_64", "rocblas_dscal_64", "library"); + subst("cublasDscal_v2", "rocblas_dscal", "library"); + subst("cublasDscal_v2_64", "rocblas_dscal_64", "library"); + subst("cublasDspmv", "rocblas_dspmv", "library"); + subst("cublasDspmv_v2", "rocblas_dspmv", "library"); + subst("cublasDspr", "rocblas_dspr", "library"); + subst("cublasDspr2", "rocblas_dspr2", "library"); + subst("cublasDspr2_v2", "rocblas_dspr2", "library"); + subst("cublasDspr_v2", "rocblas_dspr", "library"); + subst("cublasDswap", "rocblas_dswap", "library"); + subst("cublasDswap_64", "rocblas_dswap_64", "library"); + subst("cublasDswap_v2", "rocblas_dswap", "library"); + subst("cublasDswap_v2_64", "rocblas_dswap_64", "library"); + subst("cublasDsymm", "rocblas_dsymm", "library"); + subst("cublasDsymm_v2", "rocblas_dsymm", "library"); + subst("cublasDsymv", "rocblas_dsymv", "library"); + subst("cublasDsymv_v2", "rocblas_dsymv", "library"); + subst("cublasDsyr", "rocblas_dsyr", "library"); + subst("cublasDsyr2", "rocblas_dsyr2", "library"); + subst("cublasDsyr2_v2", "rocblas_dsyr2", "library"); + subst("cublasDsyr2k", "rocblas_dsyr2k", "library"); + subst("cublasDsyr2k_v2", "rocblas_dsyr2k", "library"); + subst("cublasDsyr_v2", "rocblas_dsyr", "library"); + subst("cublasDsyrk", "rocblas_dsyrk", "library"); + subst("cublasDsyrk_v2", "rocblas_dsyrk", "library"); + subst("cublasDsyrkx", "rocblas_dsyrkx", "library"); + subst("cublasDtbmv", "rocblas_dtbmv", "library"); + subst("cublasDtbmv_v2", "rocblas_dtbmv", "library"); + subst("cublasDtbsv", "rocblas_dtbsv", "library"); + subst("cublasDtbsv_v2", "rocblas_dtbsv", "library"); + subst("cublasDtpmv", "rocblas_dtpmv", "library"); + subst("cublasDtpmv_v2", "rocblas_dtpmv", "library"); + subst("cublasDtpsv", "rocblas_dtpsv", "library"); + subst("cublasDtpsv_v2", "rocblas_dtpsv", "library"); + subst("cublasDtrmm", "rocblas_dtrmm", "library"); + subst("cublasDtrmm_v2", "rocblas_dtrmm", "library"); + subst("cublasDtrmv", "rocblas_dtrmv", "library"); + subst("cublasDtrmv_v2", "rocblas_dtrmv", "library"); + subst("cublasDtrsm", "rocblas_dtrsm", "library"); + subst("cublasDtrsmBatched", "rocblas_dtrsm_batched", "library"); + subst("cublasDtrsm_v2", "rocblas_dtrsm", "library"); + subst("cublasDtrsv", "rocblas_dtrsv", "library"); + subst("cublasDtrsv_v2", "rocblas_dtrsv", "library"); + subst("cublasDzasum", "rocblas_dzasum", "library"); + subst("cublasDzasum_64", "rocblas_dzasum_64", "library"); + subst("cublasDzasum_v2", "rocblas_dzasum", "library"); + subst("cublasDzasum_v2_64", "rocblas_dzasum_64", "library"); + subst("cublasDznrm2", "rocblas_dznrm2", "library"); + subst("cublasDznrm2_64", "rocblas_dznrm2_64", "library"); + subst("cublasDznrm2_v2", "rocblas_dznrm2", "library"); + subst("cublasDznrm2_v2_64", "rocblas_dznrm2_64", "library"); + subst("cublasGemmBatchedEx", "rocblas_gemm_batched_ex", "library"); + subst("cublasGemmEx", "rocblas_gemm_ex", "library"); + subst("cublasGemmStridedBatchedEx", "rocblas_gemm_strided_batched_ex", "library"); + subst("cublasGetAtomicsMode", "rocblas_get_atomics_mode", "library"); + subst("cublasGetMathMode", "rocblas_get_math_mode", "library"); + subst("cublasGetMatrix", "rocblas_get_matrix", "library"); + subst("cublasGetMatrixAsync", "rocblas_get_matrix_async", "library"); + subst("cublasGetPointerMode", "rocblas_get_pointer_mode", "library"); + subst("cublasGetPointerMode_v2", "rocblas_get_pointer_mode", "library"); + subst("cublasGetStatusString", "rocblas_status_to_string", "library"); + subst("cublasGetStream", "rocblas_get_stream", "library"); + subst("cublasGetStream_v2", "rocblas_get_stream", "library"); + subst("cublasGetVector", "rocblas_get_vector", "library"); + subst("cublasGetVectorAsync", "rocblas_get_vector_async", "library"); + subst("cublasHSHgemvBatched", "rocblas_hshgemv_batched", "library"); + subst("cublasHSHgemvStridedBatched", "rocblas_hshgemv_strided_batched", "library"); + subst("cublasHSSgemvBatched", "rocblas_hssgemv_batched", "library"); + subst("cublasHSSgemvStridedBatched", "rocblas_hssgemv_strided_batched", "library"); + subst("cublasHgemm", "rocblas_hgemm", "library"); + subst("cublasHgemmBatched", "rocblas_hgemm_batched", "library"); + subst("cublasHgemmStridedBatched", "rocblas_hgemm_strided_batched", "library"); + subst("cublasIcamax", "rocblas_icamax", "library"); + subst("cublasIcamax_64", "rocblas_icamax_64", "library"); + subst("cublasIcamax_v2", "rocblas_icamax", "library"); + subst("cublasIcamax_v2_64", "rocblas_icamax_64", "library"); + subst("cublasIcamin", "rocblas_icamin", "library"); + subst("cublasIcamin_64", "rocblas_icamin_64", "library"); + subst("cublasIcamin_v2", "rocblas_icamin", "library"); + subst("cublasIcamin_v2_64", "rocblas_icamin_64", "library"); + subst("cublasIdamax", "rocblas_idamax", "library"); + subst("cublasIdamax_64", "rocblas_idamax_64", "library"); + subst("cublasIdamax_v2", "rocblas_idamax", "library"); + subst("cublasIdamax_v2_64", "rocblas_idamax_64", "library"); + subst("cublasIdamin", "rocblas_idamin", "library"); + subst("cublasIdamin_64", "rocblas_idamin_64", "library"); + subst("cublasIdamin_v2", "rocblas_idamin", "library"); + subst("cublasIdamin_v2_64", "rocblas_idamin_64", "library"); + subst("cublasInit", "rocblas_initialize", "library"); + subst("cublasIsamax", "rocblas_isamax", "library"); + subst("cublasIsamax_64", "rocblas_isamax_64", "library"); + subst("cublasIsamax_v2", "rocblas_isamax", "library"); + subst("cublasIsamax_v2_64", "rocblas_isamax_64", "library"); + subst("cublasIsamin", "rocblas_isamin", "library"); + subst("cublasIsamin_64", "rocblas_isamin_64", "library"); + subst("cublasIsamin_v2", "rocblas_isamin", "library"); + subst("cublasIsamin_v2_64", "rocblas_isamin_64", "library"); + subst("cublasIzamax", "rocblas_izamax", "library"); + subst("cublasIzamax_64", "rocblas_izamax_64", "library"); + subst("cublasIzamax_v2", "rocblas_izamax", "library"); + subst("cublasIzamax_v2_64", "rocblas_izamax_64", "library"); + subst("cublasIzamin", "rocblas_izamin", "library"); + subst("cublasIzamin_64", "rocblas_izamin_64", "library"); + subst("cublasIzamin_v2", "rocblas_izamin", "library"); + subst("cublasIzamin_v2_64", "rocblas_izamin_64", "library"); + subst("cublasNrm2Ex", "rocblas_nrm2_ex", "library"); + subst("cublasNrm2Ex_64", "rocblas_nrm2_ex_64", "library"); + subst("cublasRotEx", "rocblas_rot_ex", "library"); + subst("cublasRotEx_64", "rocblas_rot_ex_64", "library"); + subst("cublasSasum", "rocblas_sasum", "library"); + subst("cublasSasum_64", "rocblas_sasum_64", "library"); + subst("cublasSasum_v2", "rocblas_sasum", "library"); + subst("cublasSasum_v2_64", "rocblas_sasum_64", "library"); + subst("cublasSaxpy", "rocblas_saxpy", "library"); + subst("cublasSaxpy_64", "rocblas_saxpy_64", "library"); + subst("cublasSaxpy_v2", "rocblas_saxpy", "library"); + subst("cublasSaxpy_v2_64", "rocblas_saxpy_64", "library"); + subst("cublasScalEx", "rocblas_scal_ex", "library"); + subst("cublasScalEx_64", "rocblas_scal_ex_64", "library"); + subst("cublasScasum", "rocblas_scasum", "library"); + subst("cublasScasum_64", "rocblas_scasum_64", "library"); + subst("cublasScasum_v2", "rocblas_scasum", "library"); + subst("cublasScasum_v2_64", "rocblas_scasum_64", "library"); + subst("cublasScnrm2", "rocblas_scnrm2", "library"); + subst("cublasScnrm2_64", "rocblas_scnrm2_64", "library"); + subst("cublasScnrm2_v2", "rocblas_scnrm2", "library"); + subst("cublasScnrm2_v2_64", "rocblas_scnrm2_64", "library"); + subst("cublasScopy", "rocblas_scopy", "library"); + subst("cublasScopy_64", "rocblas_scopy_64", "library"); + subst("cublasScopy_v2", "rocblas_scopy", "library"); + subst("cublasScopy_v2_64", "rocblas_scopy_64", "library"); + subst("cublasSdgmm", "rocblas_sdgmm", "library"); + subst("cublasSdot", "rocblas_sdot", "library"); + subst("cublasSdot_64", "rocblas_sdot_64", "library"); + subst("cublasSdot_v2", "rocblas_sdot", "library"); + subst("cublasSdot_v2_64", "rocblas_sdot_64", "library"); + subst("cublasSetAtomicsMode", "rocblas_set_atomics_mode", "library"); + subst("cublasSetMathMode", "rocblas_set_math_mode", "library"); + subst("cublasSetMatrix", "rocblas_set_matrix", "library"); + subst("cublasSetMatrixAsync", "rocblas_set_matrix_async", "library"); + subst("cublasSetPointerMode", "rocblas_set_pointer_mode", "library"); + subst("cublasSetPointerMode_v2", "rocblas_set_pointer_mode", "library"); + subst("cublasSetStream", "rocblas_set_stream", "library"); + subst("cublasSetStream_v2", "rocblas_set_stream", "library"); + subst("cublasSetVector", "rocblas_set_vector", "library"); + subst("cublasSetVectorAsync", "rocblas_set_vector_async", "library"); + subst("cublasSgbmv", "rocblas_sgbmv", "library"); + subst("cublasSgbmv_v2", "rocblas_sgbmv", "library"); + subst("cublasSgeam", "rocblas_sgeam", "library"); + subst("cublasSgemm", "rocblas_sgemm", "library"); + subst("cublasSgemmBatched", "rocblas_sgemm_batched", "library"); + subst("cublasSgemmStridedBatched", "rocblas_sgemm_strided_batched", "library"); + subst("cublasSgemm_v2", "rocblas_sgemm", "library"); + subst("cublasSgemv", "rocblas_sgemv", "library"); + subst("cublasSgemv_v2", "rocblas_sgemv", "library"); + subst("cublasSger", "rocblas_sger", "library"); + subst("cublasSger_v2", "rocblas_sger", "library"); + subst("cublasSnrm2", "rocblas_snrm2", "library"); + subst("cublasSnrm2_64", "rocblas_snrm2_64", "library"); + subst("cublasSnrm2_v2", "rocblas_snrm2", "library"); + subst("cublasSnrm2_v2_64", "rocblas_snrm2_64", "library"); + subst("cublasSrot", "rocblas_srot", "library"); + subst("cublasSrot_64", "rocblas_srot_64", "library"); + subst("cublasSrot_v2", "rocblas_srot", "library"); + subst("cublasSrot_v2_64", "rocblas_srot_64", "library"); + subst("cublasSrotg", "rocblas_srotg", "library"); + subst("cublasSrotg_v2", "rocblas_srotg", "library"); + subst("cublasSrotm", "rocblas_srotm", "library"); + subst("cublasSrotm_64", "rocblas_srotm_64", "library"); + subst("cublasSrotm_v2", "rocblas_srotm", "library"); + subst("cublasSrotm_v2_64", "rocblas_srotm_64", "library"); + subst("cublasSrotmg", "rocblas_srotmg", "library"); + subst("cublasSrotmg_v2", "rocblas_srotmg", "library"); + subst("cublasSsbmv", "rocblas_ssbmv", "library"); + subst("cublasSsbmv_v2", "rocblas_ssbmv", "library"); + subst("cublasSscal", "rocblas_sscal", "library"); + subst("cublasSscal_64", "rocblas_sscal_64", "library"); + subst("cublasSscal_v2", "rocblas_sscal", "library"); + subst("cublasSscal_v2_64", "rocblas_sscal_64", "library"); + subst("cublasSspmv", "rocblas_sspmv", "library"); + subst("cublasSspmv_v2", "rocblas_sspmv", "library"); + subst("cublasSspr", "rocblas_sspr", "library"); + subst("cublasSspr2", "rocblas_sspr2", "library"); + subst("cublasSspr2_v2", "rocblas_sspr2", "library"); + subst("cublasSspr_v2", "rocblas_sspr", "library"); + subst("cublasSswap", "rocblas_sswap", "library"); + subst("cublasSswap_64", "rocblas_sswap_64", "library"); + subst("cublasSswap_v2", "rocblas_sswap", "library"); + subst("cublasSswap_v2_64", "rocblas_sswap_64", "library"); + subst("cublasSsymm", "rocblas_ssymm", "library"); + subst("cublasSsymm_v2", "rocblas_ssymm", "library"); + subst("cublasSsymv", "rocblas_ssymv", "library"); + subst("cublasSsymv_v2", "rocblas_ssymv", "library"); + subst("cublasSsyr", "rocblas_ssyr", "library"); + subst("cublasSsyr2", "rocblas_ssyr2", "library"); + subst("cublasSsyr2_v2", "rocblas_ssyr2", "library"); + subst("cublasSsyr2k", "rocblas_ssyr2k", "library"); + subst("cublasSsyr2k_v2", "rocblas_ssyr2k", "library"); + subst("cublasSsyr_v2", "rocblas_ssyr", "library"); + subst("cublasSsyrk", "rocblas_ssyrk", "library"); + subst("cublasSsyrk_v2", "rocblas_ssyrk", "library"); + subst("cublasSsyrkx", "rocblas_ssyrkx", "library"); + subst("cublasStbmv", "rocblas_stbmv", "library"); + subst("cublasStbmv_v2", "rocblas_stbmv", "library"); + subst("cublasStbsv", "rocblas_stbsv", "library"); + subst("cublasStbsv_v2", "rocblas_stbsv", "library"); + subst("cublasStpmv", "rocblas_stpmv", "library"); + subst("cublasStpmv_v2", "rocblas_stpmv", "library"); + subst("cublasStpsv", "rocblas_stpsv", "library"); + subst("cublasStpsv_v2", "rocblas_stpsv", "library"); + subst("cublasStrmm", "rocblas_strmm", "library"); + subst("cublasStrmm_v2", "rocblas_strmm", "library"); + subst("cublasStrmv", "rocblas_strmv", "library"); + subst("cublasStrmv_v2", "rocblas_strmv", "library"); + subst("cublasStrsm", "rocblas_strsm", "library"); + subst("cublasStrsmBatched", "rocblas_strsm_batched", "library"); + subst("cublasStrsm_v2", "rocblas_strsm", "library"); + subst("cublasStrsv", "rocblas_strsv", "library"); + subst("cublasStrsv_v2", "rocblas_strsv", "library"); + subst("cublasTSSgemvBatched", "rocblas_tssgemv_batched", "library"); + subst("cublasTSSgemvStridedBatched", "rocblas_tssgemv_strided_batched", "library"); + subst("cublasTSTgemvBatched", "rocblas_tstgemv_batched", "library"); + subst("cublasTSTgemvStridedBatched", "rocblas_tstgemv_strided_batched", "library"); + subst("cublasZaxpy", "rocblas_zaxpy", "library"); + subst("cublasZaxpy_64", "rocblas_zaxpy_64", "library"); + subst("cublasZaxpy_v2", "rocblas_zaxpy", "library"); + subst("cublasZaxpy_v2_64", "rocblas_zaxpy_64", "library"); + subst("cublasZcopy", "rocblas_zcopy", "library"); + subst("cublasZcopy_64", "rocblas_zcopy_64", "library"); + subst("cublasZcopy_v2", "rocblas_zcopy", "library"); + subst("cublasZcopy_v2_64", "rocblas_zcopy_64", "library"); + subst("cublasZdgmm", "rocblas_zdgmm", "library"); + subst("cublasZdotc", "rocblas_zdotc", "library"); + subst("cublasZdotc_64", "rocblas_zdotc_64", "library"); + subst("cublasZdotc_v2", "rocblas_zdotc", "library"); + subst("cublasZdotc_v2_64", "rocblas_zdotc_64", "library"); + subst("cublasZdotu", "rocblas_zdotu", "library"); + subst("cublasZdotu_64", "rocblas_zdotu_64", "library"); + subst("cublasZdotu_v2", "rocblas_zdotu", "library"); + subst("cublasZdotu_v2_64", "rocblas_zdotu_64", "library"); + subst("cublasZdrot", "rocblas_zdrot", "library"); + subst("cublasZdrot_64", "rocblas_zdrot_64", "library"); + subst("cublasZdrot_v2", "rocblas_zdrot", "library"); + subst("cublasZdrot_v2_64", "rocblas_zdrot_64", "library"); + subst("cublasZdscal", "rocblas_zdscal", "library"); + subst("cublasZdscal_64", "rocblas_zdscal_64", "library"); + subst("cublasZdscal_v2", "rocblas_zdscal", "library"); + subst("cublasZdscal_v2_64", "rocblas_zdscal_64", "library"); + subst("cublasZgbmv", "rocblas_zgbmv", "library"); + subst("cublasZgbmv_v2", "rocblas_zgbmv", "library"); + subst("cublasZgeam", "rocblas_zgeam", "library"); + subst("cublasZgemm", "rocblas_zgemm", "library"); + subst("cublasZgemmBatched", "rocblas_zgemm_batched", "library"); + subst("cublasZgemmStridedBatched", "rocblas_zgemm_strided_batched", "library"); + subst("cublasZgemm_v2", "rocblas_zgemm", "library"); + subst("cublasZgemv", "rocblas_zgemv", "library"); + subst("cublasZgemvBatched", "rocblas_zgemv_batched", "library"); + subst("cublasZgemvStridedBatched", "rocblas_zgemv_strided_batched", "library"); + subst("cublasZgemv_v2", "rocblas_zgemv", "library"); + subst("cublasZgerc", "rocblas_zgerc", "library"); + subst("cublasZgerc_v2", "rocblas_zgerc", "library"); + subst("cublasZgeru", "rocblas_zgeru", "library"); + subst("cublasZgeru_v2", "rocblas_zgeru", "library"); + subst("cublasZhbmv", "rocblas_zhbmv", "library"); + subst("cublasZhbmv_v2", "rocblas_zhbmv", "library"); + subst("cublasZhemm", "rocblas_zhemm", "library"); + subst("cublasZhemm_v2", "rocblas_zhemm", "library"); + subst("cublasZhemv", "rocblas_zhemv", "library"); + subst("cublasZhemv_v2", "rocblas_zhemv", "library"); + subst("cublasZher", "rocblas_zher", "library"); + subst("cublasZher2", "rocblas_zher2", "library"); + subst("cublasZher2_v2", "rocblas_zher2", "library"); + subst("cublasZher2k", "rocblas_zher2k", "library"); + subst("cublasZher2k_v2", "rocblas_zher2k", "library"); + subst("cublasZher_v2", "rocblas_zher", "library"); + subst("cublasZherk", "rocblas_zherk", "library"); + subst("cublasZherk_v2", "rocblas_zherk", "library"); + subst("cublasZherkx", "rocblas_zherkx", "library"); + subst("cublasZhpmv", "rocblas_zhpmv", "library"); + subst("cublasZhpmv_v2", "rocblas_zhpmv", "library"); + subst("cublasZhpr", "rocblas_zhpr", "library"); + subst("cublasZhpr2", "rocblas_zhpr2", "library"); + subst("cublasZhpr2_v2", "rocblas_zhpr2", "library"); + subst("cublasZhpr_v2", "rocblas_zhpr", "library"); + subst("cublasZrot", "rocblas_zrot", "library"); + subst("cublasZrot_64", "rocblas_zrot_64", "library"); + subst("cublasZrot_v2", "rocblas_zrot", "library"); + subst("cublasZrot_v2_64", "rocblas_zrot_64", "library"); + subst("cublasZrotg", "rocblas_zrotg", "library"); + subst("cublasZrotg_v2", "rocblas_zrotg", "library"); + subst("cublasZscal", "rocblas_zscal", "library"); + subst("cublasZscal_64", "rocblas_zscal_64", "library"); + subst("cublasZscal_v2", "rocblas_zscal", "library"); + subst("cublasZscal_v2_64", "rocblas_zscal_64", "library"); + subst("cublasZswap", "rocblas_zswap", "library"); + subst("cublasZswap_64", "rocblas_zswap_64", "library"); + subst("cublasZswap_v2", "rocblas_zswap", "library"); + subst("cublasZswap_v2_64", "rocblas_zswap_64", "library"); + subst("cublasZsymm", "rocblas_zsymm", "library"); + subst("cublasZsymm_v2", "rocblas_zsymm", "library"); + subst("cublasZsymv", "rocblas_zsymv", "library"); + subst("cublasZsymv_v2", "rocblas_zsymv", "library"); + subst("cublasZsyr", "rocblas_zsyr", "library"); + subst("cublasZsyr2", "rocblas_zsyr2", "library"); + subst("cublasZsyr2_v2", "rocblas_zsyr2", "library"); + subst("cublasZsyr2k", "rocblas_zsyr2k", "library"); + subst("cublasZsyr2k_v2", "rocblas_zsyr2k", "library"); + subst("cublasZsyr_v2", "rocblas_zsyr", "library"); + subst("cublasZsyrk", "rocblas_zsyrk", "library"); + subst("cublasZsyrk_v2", "rocblas_zsyrk", "library"); + subst("cublasZsyrkx", "rocblas_zsyrkx", "library"); + subst("cublasZtbmv", "rocblas_ztbmv", "library"); + subst("cublasZtbmv_v2", "rocblas_ztbmv", "library"); + subst("cublasZtbsv", "rocblas_ztbsv", "library"); + subst("cublasZtbsv_v2", "rocblas_ztbsv", "library"); + subst("cublasZtpmv", "rocblas_ztpmv", "library"); + subst("cublasZtpmv_v2", "rocblas_ztpmv", "library"); + subst("cublasZtpsv", "rocblas_ztpsv", "library"); + subst("cublasZtpsv_v2", "rocblas_ztpsv", "library"); + subst("cublasZtrmm", "rocblas_ztrmm", "library"); + subst("cublasZtrmm_v2", "rocblas_ztrmm", "library"); + subst("cublasZtrmv", "rocblas_ztrmv", "library"); + subst("cublasZtrmv_v2", "rocblas_ztrmv", "library"); + subst("cublasZtrsm", "rocblas_ztrsm", "library"); + subst("cublasZtrsmBatched", "rocblas_ztrsm_batched", "library"); + subst("cublasZtrsm_v2", "rocblas_ztrsm", "library"); + subst("cublasZtrsv", "rocblas_ztrsv", "library"); + subst("cublasZtrsv_v2", "rocblas_ztrsv", "library"); + subst("cudnnActivationBackward", "miopenActivationBackward", "library"); + subst("cudnnActivationForward", "miopenActivationForward", "library"); + subst("cudnnBatchNormalizationBackward", "miopenBatchNormalizationBackward", "library"); + subst("cudnnBatchNormalizationForwardInference", "miopenBatchNormalizationForwardInference", "library"); + subst("cudnnBatchNormalizationForwardTraining", "miopenBatchNormalizationForwardTraining", "library"); + subst("cudnnCTCLoss", "miopenCTCLoss", "library"); + subst("cudnnConvolutionBackwardBias", "miopenConvolutionBackwardBias", "library"); + subst("cudnnConvolutionBackwardData", "miopenConvolutionBackwardData", "library"); + subst("cudnnConvolutionBiasActivationForward", "miopenConvolutionBiasActivationForward", "library"); + subst("cudnnConvolutionForward", "miopenConvolutionForward", "library"); + subst("cudnnCreate", "miopenCreate", "library"); + subst("cudnnCreateActivationDescriptor", "miopenCreateActivationDescriptor", "library"); + subst("cudnnCreateCTCLossDescriptor", "miopenCreateCTCLossDescriptor", "library"); + subst("cudnnCreateConvolutionDescriptor", "miopenCreateConvolutionDescriptor", "library"); + subst("cudnnCreateDropoutDescriptor", "miopenCreateDropoutDescriptor", "library"); + subst("cudnnCreateLRNDescriptor", "miopenCreateLRNDescriptor", "library"); + subst("cudnnCreatePoolingDescriptor", "miopenCreatePoolingDescriptor", "library"); + subst("cudnnCreateRNNDescriptor", "miopenCreateRNNDescriptor", "library"); + subst("cudnnCreateReduceTensorDescriptor", "miopenCreateReduceTensorDescriptor", "library"); + subst("cudnnCreateTensorDescriptor", "miopenCreateTensorDescriptor", "library"); + subst("cudnnDeriveBNTensorDescriptor", "miopenDeriveBNTensorDescriptor", "library"); + subst("cudnnDestroy", "miopenDestroy", "library"); + subst("cudnnDestroyActivationDescriptor", "miopenDestroyActivationDescriptor", "library"); + subst("cudnnDestroyCTCLossDescriptor", "miopenDestroyCTCLossDescriptor", "library"); + subst("cudnnDestroyConvolutionDescriptor", "miopenDestroyConvolutionDescriptor", "library"); + subst("cudnnDestroyDropoutDescriptor", "miopenDestroyDropoutDescriptor", "library"); + subst("cudnnDestroyLRNDescriptor", "miopenDestroyLRNDescriptor", "library"); + subst("cudnnDestroyPoolingDescriptor", "miopenDestroyPoolingDescriptor", "library"); + subst("cudnnDestroyRNNDescriptor", "miopenDestroyRNNDescriptor", "library"); + subst("cudnnDestroyReduceTensorDescriptor", "miopenDestroyReduceTensorDescriptor", "library"); + subst("cudnnDestroyTensorDescriptor", "miopenDestroyTensorDescriptor", "library"); + subst("cudnnDropoutBackward", "miopenDropoutBackward", "library"); + subst("cudnnDropoutForward", "miopenDropoutForward", "library"); + subst("cudnnDropoutGetReserveSpaceSize", "miopenDropoutGetReserveSpaceSize", "library"); + subst("cudnnDropoutGetStatesSize", "miopenDropoutGetStatesSize", "library"); + subst("cudnnFindConvolutionForwardAlgorithmEx", "miopenFindConvolutionForwardAlgorithm", "library"); + subst("cudnnGetCTCLossDescriptor", "miopenGetCTCLossDescriptor", "library"); + subst("cudnnGetCTCLossWorkspaceSize", "miopenGetCTCLossWorkspaceSize", "library"); + subst("cudnnGetConvolution2dForwardOutputDim", "miopenGetConvolutionForwardOutputDim", "library"); + subst("cudnnGetConvolutionBackwardDataWorkspaceSize", "miopenConvolutionBackwardDataGetWorkSpaceSize", "library"); + subst("cudnnGetConvolutionForwardWorkspaceSize", "miopenConvolutionForwardGetWorkSpaceSize", "library"); + subst("cudnnGetDropoutDescriptor", "miopenGetDropoutDescriptor", "library"); + subst("cudnnGetErrorString", "miopenGetErrorString", "library"); + subst("cudnnGetLRNDescriptor", "miopenGetLRNDescriptor", "library"); + subst("cudnnGetPooling2dDescriptor", "miopenGet2dPoolingDescriptor", "library"); + subst("cudnnGetPooling2dForwardOutputDim", "miopenGetPoolingForwardOutputDim", "library"); + subst("cudnnGetPoolingNdDescriptor", "miopenGetNdPoolingDescriptor", "library"); + subst("cudnnGetPoolingNdForwardOutputDim", "miopenGetPoolingNdForwardOutputDim", "library"); + subst("cudnnGetRNNDescriptor", "miopenGetRNNDescriptor_V2", "library"); + subst("cudnnGetRNNDescriptor_v6", "miopenGetRNNDescriptor_V2", "library"); + subst("cudnnGetRNNParamsSize", "miopenGetRNNParamsSize", "library"); + subst("cudnnGetRNNTrainingReserveSize", "miopenGetRNNTrainingReserveSize", "library"); + subst("cudnnGetRNNWorkspaceSize", "miopenGetRNNWorkspaceSize", "library"); + subst("cudnnGetReduceTensorDescriptor", "miopenGetReduceTensorDescriptor", "library"); + subst("cudnnGetReductionIndicesSize", "miopenGetReductionIndicesSize", "library"); + subst("cudnnGetReductionWorkspaceSize", "miopenGetReductionWorkspaceSize", "library"); + subst("cudnnGetStream", "miopenGetStream", "library"); + subst("cudnnGetTensor4dDescriptor", "miopenGet4dTensorDescriptor", "library"); + subst("cudnnRNNBackwardData", "miopenRNNBackwardData", "library"); + subst("cudnnRNNBackwardWeights", "miopenRNNBackwardWeights", "library"); + subst("cudnnRNNForwardInference", "miopenRNNForwardInference", "library"); + subst("cudnnRNNForwardTraining", "miopenRNNForwardTraining", "library"); + subst("cudnnReduceTensor", "miopenReduceTensor", "library"); + subst("cudnnRestoreDropoutDescriptor", "miopenRestoreDropoutDescriptor", "library"); + subst("cudnnScaleTensor", "miopenScaleTensor", "library"); + subst("cudnnSetCTCLossDescriptor", "miopenSetCTCLossDescriptor", "library"); + subst("cudnnSetConvolutionGroupCount", "miopenSetConvolutionGroupCount", "library"); + subst("cudnnSetDropoutDescriptor", "miopenSetDropoutDescriptor", "library"); + subst("cudnnSetLRNDescriptor", "miopenSetLRNDescriptor", "library"); + subst("cudnnSetPooling2dDescriptor", "miopenSet2dPoolingDescriptor", "library"); + subst("cudnnSetPoolingNdDescriptor", "miopenSetNdPoolingDescriptor", "library"); + subst("cudnnSetRNNDescriptor_v6", "miopenSetRNNDescriptor_V2", "library"); + subst("cudnnSetReduceTensorDescriptor", "miopenSetReduceTensorDescriptor", "library"); + subst("cudnnSetStream", "miopenSetStream", "library"); + subst("cudnnSetTensor", "miopenSetTensor", "library"); + subst("cudnnSetTensor4dDescriptorEx", "miopenSet4dTensorDescriptorEx", "library"); + subst("cudnnSoftmaxBackward", "miopenSoftmaxBackward_V2", "library"); + subst("cudnnSoftmaxForward", "miopenSoftmaxForward_V2", "library"); + subst("cudnnTransformTensor", "miopenTransformTensor", "library"); + subst("cusolverDnCpotrf", "rocsolver_cpotrf", "library"); + subst("cusolverDnCreate", "rocblas_create_handle", "library"); + subst("cusolverDnDestroy", "rocblas_destroy_handle", "library"); + subst("cusolverDnDpotrf", "rocsolver_dpotrf", "library"); + subst("cusolverDnGetStream", "rocblas_get_stream", "library"); + subst("cusolverDnSetStream", "rocblas_set_stream", "library"); + subst("cusolverDnSpotrf", "rocsolver_spotrf", "library"); + subst("cusolverDnZpotrf", "rocsolver_zpotrf", "library"); + subst("cusparseAxpby", "rocsparse_axpby", "library"); + subst("cusparseBlockedEllGet", "rocsparse_bell_get", "library"); + subst("cusparseCaxpyi", "rocsparse_caxpyi", "library"); + subst("cusparseCbsr2csr", "rocsparse_cbsr2csr", "library"); + subst("cusparseCbsric02", "rocsparse_cbsric0", "library"); + subst("cusparseCbsric02_analysis", "rocsparse_cbsric0_analysis", "library"); + subst("cusparseCbsric02_bufferSize", "rocsparse_cbsric0_buffer_size", "library"); + subst("cusparseCbsrilu02", "rocsparse_cbsrilu0", "library"); + subst("cusparseCbsrilu02_analysis", "rocsparse_cbsrilu0_analysis", "library"); + subst("cusparseCbsrilu02_bufferSize", "rocsparse_cbsrilu0_buffer_size", "library"); + subst("cusparseCbsrilu02_numericBoost", "rocsparse_dcbsrilu0_numeric_boost", "library"); + subst("cusparseCbsrmm", "rocsparse_cbsrmm", "library"); + subst("cusparseCbsrmv", "rocsparse_cbsrmv", "library"); + subst("cusparseCbsrsm2_analysis", "rocsparse_cbsrsm_analysis", "library"); + subst("cusparseCbsrsm2_bufferSize", "rocsparse_cbsrsm_buffer_size", "library"); + subst("cusparseCbsrsm2_solve", "rocsparse_cbsrsm_solve", "library"); + subst("cusparseCbsrsv2_analysis", "rocsparse_cbsrsv_analysis", "library"); + subst("cusparseCbsrsv2_bufferSize", "rocsparse_cbsrsv_buffer_size", "library"); + subst("cusparseCbsrsv2_bufferSizeExt", "rocsparse_cbsrsv_buffer_size", "library"); + subst("cusparseCbsrsv2_solve", "rocsparse_cbsrsv_solve", "library"); + subst("cusparseCbsrxmv", "rocsparse_cbsrxmv", "library"); + subst("cusparseCcsc2dense", "rocsparse_ccsc2dense", "library"); + subst("cusparseCcsr2bsr", "rocsparse_ccsr2bsr", "library"); + subst("cusparseCcsr2csr_compress", "rocsparse_ccsr2csr_compress", "library"); + subst("cusparseCcsr2dense", "rocsparse_ccsr2dense", "library"); + subst("cusparseCcsr2gebsr", "rocsparse_ccsr2gebsr", "library"); + subst("cusparseCcsr2gebsr_bufferSize", "rocsparse_ccsr2gebsr_buffer_size", "library"); + subst("cusparseCcsr2hyb", "rocsparse_ccsr2hyb", "library"); + subst("cusparseCcsrcolor", "rocsparse_ccsrcolor", "library"); + subst("cusparseCcsrgeam", "rocsparse_ccsrgeam", "library"); + subst("cusparseCcsrgeam2", "rocsparse_ccsrgeam", "library"); + subst("cusparseCcsrgemm2", "rocsparse_ccsrgemm", "library"); + subst("cusparseCcsrgemm2_bufferSizeExt", "rocsparse_ccsrgemm_buffer_size", "library"); + subst("cusparseCcsric02", "rocsparse_ccsric0", "library"); + subst("cusparseCcsric02_analysis", "rocsparse_ccsric0_analysis", "library"); + subst("cusparseCcsric02_bufferSize", "rocsparse_ccsric0_buffer_size", "library"); + subst("cusparseCcsric02_bufferSizeExt", "rocsparse_ccsric0_buffer_size", "library"); + subst("cusparseCcsrilu02", "rocsparse_ccsrilu0", "library"); + subst("cusparseCcsrilu02_analysis", "rocsparse_ccsrilu0_analysis", "library"); + subst("cusparseCcsrilu02_bufferSize", "rocsparse_ccsrilu0_buffer_size", "library"); + subst("cusparseCcsrilu02_bufferSizeExt", "rocsparse_ccsrilu0_buffer_size", "library"); + subst("cusparseCcsrilu02_numericBoost", "rocsparse_dccsrilu0_numeric_boost", "library"); + subst("cusparseCcsrmm", "rocsparse_ccsrmm", "library"); + subst("cusparseCcsrmm2", "rocsparse_ccsrmm", "library"); + subst("cusparseCcsrmv", "rocsparse_ccsrmv", "library"); + subst("cusparseCcsrsm2_analysis", "rocsparse_ccsrsm_analysis", "library"); + subst("cusparseCcsrsm2_bufferSizeExt", "rocsparse_ccsrsm_buffer_size", "library"); + subst("cusparseCcsrsm2_solve", "rocsparse_ccsrsm_solve", "library"); + subst("cusparseCcsrsv2_analysis", "rocsparse_ccsrsv_analysis", "library"); + subst("cusparseCcsrsv2_bufferSize", "rocsparse_ccsrsv_buffer_size", "library"); + subst("cusparseCcsrsv2_bufferSizeExt", "rocsparse_ccsrsv_buffer_size", "library"); + subst("cusparseCcsrsv2_solve", "rocsparse_ccsrsv_solve", "library"); + subst("cusparseCdense2csc", "rocsparse_cdense2csc", "library"); + subst("cusparseCdense2csr", "rocsparse_cdense2csr", "library"); + subst("cusparseCdotci", "rocsparse_cdotci", "library"); + subst("cusparseCdoti", "rocsparse_cdoti", "library"); + subst("cusparseCgebsr2csr", "rocsparse_cgebsr2csr", "library"); + subst("cusparseCgebsr2gebsc", "rocsparse_cgebsr2gebsc", "library"); + subst("cusparseCgebsr2gebsc_bufferSize", "rocsparse_cgebsr2gebsc_buffer_size", "library"); + subst("cusparseCgebsr2gebsr", "rocsparse_cgebsr2gebsr", "library"); + subst("cusparseCgebsr2gebsr_bufferSize", "rocsparse_cgebsr2gebsr_buffer_size", "library"); + subst("cusparseCgemvi", "rocsparse_cgemvi", "library"); + subst("cusparseCgemvi_bufferSize", "rocsparse_cgemvi_buffer_size", "library"); + subst("cusparseCgpsvInterleavedBatch", "rocsparse_cgpsv_interleaved_batch", "library"); + subst("cusparseCgpsvInterleavedBatch_bufferSizeExt", "rocsparse_cgpsv_interleaved_batch_buffer_size", "library"); + subst("cusparseCgthr", "rocsparse_cgthr", "library"); + subst("cusparseCgthrz", "rocsparse_cgthrz", "library"); + subst("cusparseCgtsv2", "rocsparse_cgtsv", "library"); + subst("cusparseCgtsv2StridedBatch", "rocsparse_cgtsv_no_pivot_strided_batch", "library"); + subst("cusparseCgtsv2StridedBatch_bufferSizeExt", "rocsparse_cgtsv_no_pivot_strided_batch_buffer_size", "library"); + subst("cusparseCgtsv2_bufferSizeExt", "rocsparse_cgtsv_buffer_size", "library"); + subst("cusparseCgtsv2_nopivot", "rocsparse_cgtsv_no_pivot", "library"); + subst("cusparseCgtsv2_nopivot_bufferSizeExt", "rocsparse_cgtsv_no_pivot_buffer_size", "library"); + subst("cusparseCgtsvInterleavedBatch", "rocsparse_cgtsv_interleaved_batch", "library"); + subst("cusparseCgtsvInterleavedBatch_bufferSizeExt", "rocsparse_cgtsv_interleaved_batch_buffer_size", "library"); + subst("cusparseChybmv", "rocsparse_chybmv", "library"); + subst("cusparseCnnz", "rocsparse_cnnz", "library"); + subst("cusparseCnnz_compress", "rocsparse_cnnz_compress", "library"); + subst("cusparseConstBlockedEllGet", "rocsparse_const_bell_get", "library"); + subst("cusparseConstCooGet", "rocsparse_const_coo_get", "library"); + subst("cusparseConstCscGet", "rocsparse_const_csc_get", "library"); + subst("cusparseConstCsrGet", "rocsparse_const_csr_get", "library"); + subst("cusparseConstDnMatGet", "rocsparse_const_dnmat_get", "library"); + subst("cusparseConstDnMatGetValues", "rocsparse_const_dnmat_get_values", "library"); + subst("cusparseConstDnVecGet", "rocsparse_const_dnvec_get", "library"); + subst("cusparseConstDnVecGetValues", "rocsparse_const_dnvec_get_values", "library"); + subst("cusparseConstSpMatGetValues", "rocsparse_const_spmat_get_values", "library"); + subst("cusparseConstSpVecGet", "rocsparse_const_spvec_get", "library"); + subst("cusparseConstSpVecGetValues", "rocsparse_const_spvec_get_values", "library"); + subst("cusparseCooAoSGet", "rocsparse_coo_aos_get", "library"); + subst("cusparseCooGet", "rocsparse_coo_get", "library"); + subst("cusparseCooSetPointers", "rocsparse_coo_set_pointers", "library"); + subst("cusparseCooSetStridedBatch", "rocsparse_coo_set_strided_batch", "library"); + subst("cusparseCopyMatDescr", "rocsparse_copy_mat_descr", "library"); + subst("cusparseCreate", "rocsparse_create_handle", "library"); + subst("cusparseCreateBlockedEll", "rocsparse_create_bell_descr", "library"); + subst("cusparseCreateBsric02Info", "rocsparse_create_mat_info", "library"); + subst("cusparseCreateBsrilu02Info", "rocsparse_create_mat_info", "library"); + subst("cusparseCreateBsrsm2Info", "rocsparse_create_mat_info", "library"); + subst("cusparseCreateBsrsv2Info", "rocsparse_create_mat_info", "library"); + subst("cusparseCreateColorInfo", "rocsparse_create_color_info", "library"); + subst("cusparseCreateConstBlockedEll", "rocsparse_create_const_bell_descr", "library"); + subst("cusparseCreateConstCoo", "rocsparse_create_const_coo_descr", "library"); + subst("cusparseCreateConstCsc", "rocsparse_create_const_csc_descr", "library"); + subst("cusparseCreateConstCsr", "rocsparse_create_const_csr_descr", "library"); + subst("cusparseCreateConstDnMat", "rocsparse_create_const_dnmat_descr", "library"); + subst("cusparseCreateConstDnVec", "rocsparse_create_const_dnvec_descr", "library"); + subst("cusparseCreateConstSpVec", "rocsparse_create_const_spvec_descr", "library"); + subst("cusparseCreateCoo", "rocsparse_create_coo_descr", "library"); + subst("cusparseCreateCooAoS", "rocsparse_create_coo_aos_descr", "library"); + subst("cusparseCreateCsc", "rocsparse_create_csc_descr", "library"); + subst("cusparseCreateCsr", "rocsparse_create_csr_descr", "library"); + subst("cusparseCreateCsrgemm2Info", "rocsparse_create_mat_info", "library"); + subst("cusparseCreateCsric02Info", "rocsparse_create_mat_info", "library"); + subst("cusparseCreateCsrilu02Info", "rocsparse_create_mat_info", "library"); + subst("cusparseCreateCsrsm2Info", "rocsparse_create_mat_info", "library"); + subst("cusparseCreateCsrsv2Info", "rocsparse_create_mat_info", "library"); + subst("cusparseCreateDnMat", "rocsparse_create_dnmat_descr", "library"); + subst("cusparseCreateDnVec", "rocsparse_create_dnvec_descr", "library"); + subst("cusparseCreateHybMat", "rocsparse_create_hyb_mat", "library"); + subst("cusparseCreateIdentityPermutation", "rocsparse_create_identity_permutation", "library"); + subst("cusparseCreateMatDescr", "rocsparse_create_mat_descr", "library"); + subst("cusparseCreatePruneInfo", "rocsparse_create_mat_info", "library"); + subst("cusparseCreateSpVec", "rocsparse_create_spvec_descr", "library"); + subst("cusparseCscGet", "rocsparse_csc_get", "library"); + subst("cusparseCscSetPointers", "rocsparse_csc_set_pointers", "library"); + subst("cusparseCsctr", "rocsparse_csctr", "library"); + subst("cusparseCsr2cscEx2_bufferSize", "rocsparse_csr2csc_buffer_size", "library"); + subst("cusparseCsrGet", "rocsparse_csr_get", "library"); + subst("cusparseCsrSetPointers", "rocsparse_csr_set_pointers", "library"); + subst("cusparseCsrSetStridedBatch", "rocsparse_csr_set_strided_batch", "library"); + subst("cusparseDaxpyi", "rocsparse_daxpyi", "library"); + subst("cusparseDbsr2csr", "rocsparse_dbsr2csr", "library"); + subst("cusparseDbsric02", "rocsparse_dbsric0", "library"); + subst("cusparseDbsric02_analysis", "rocsparse_dbsric0_analysis", "library"); + subst("cusparseDbsric02_bufferSize", "rocsparse_dbsric0_buffer_size", "library"); + subst("cusparseDbsrilu02", "rocsparse_dbsrilu0", "library"); + subst("cusparseDbsrilu02_analysis", "rocsparse_dbsrilu0_analysis", "library"); + subst("cusparseDbsrilu02_bufferSize", "rocsparse_dbsrilu0_buffer_size", "library"); + subst("cusparseDbsrilu02_numericBoost", "rocsparse_dbsrilu0_numeric_boost", "library"); + subst("cusparseDbsrmm", "rocsparse_dbsrmm", "library"); + subst("cusparseDbsrmv", "rocsparse_dbsrmv", "library"); + subst("cusparseDbsrsm2_analysis", "rocsparse_dbsrsm_analysis", "library"); + subst("cusparseDbsrsm2_bufferSize", "rocsparse_dbsrsm_buffer_size", "library"); + subst("cusparseDbsrsm2_solve", "rocsparse_dbsrsm_solve", "library"); + subst("cusparseDbsrsv2_analysis", "rocsparse_dbsrsv_analysis", "library"); + subst("cusparseDbsrsv2_bufferSize", "rocsparse_dbsrsv_buffer_size", "library"); + subst("cusparseDbsrsv2_bufferSizeExt", "rocsparse_dbsrsv_buffer_size", "library"); + subst("cusparseDbsrsv2_solve", "rocsparse_dbsrsv_solve", "library"); + subst("cusparseDbsrxmv", "rocsparse_dbsrxmv", "library"); + subst("cusparseDcsc2dense", "rocsparse_dcsc2dense", "library"); + subst("cusparseDcsr2bsr", "rocsparse_dcsr2bsr", "library"); + subst("cusparseDcsr2csr_compress", "rocsparse_dcsr2csr_compress", "library"); + subst("cusparseDcsr2dense", "rocsparse_dcsr2dense", "library"); + subst("cusparseDcsr2gebsr", "rocsparse_dcsr2gebsr", "library"); + subst("cusparseDcsr2gebsr_bufferSize", "rocsparse_dcsr2gebsr_buffer_size", "library"); + subst("cusparseDcsr2hyb", "rocsparse_dcsr2hyb", "library"); + subst("cusparseDcsrcolor", "rocsparse_dcsrcolor", "library"); + subst("cusparseDcsrgeam", "rocsparse_dcsrgeam", "library"); + subst("cusparseDcsrgeam2", "rocsparse_dcsrgeam", "library"); + subst("cusparseDcsrgemm2", "rocsparse_dcsrgemm", "library"); + subst("cusparseDcsrgemm2_bufferSizeExt", "rocsparse_dcsrgemm_buffer_size", "library"); + subst("cusparseDcsric02", "rocsparse_dcsric0", "library"); + subst("cusparseDcsric02_analysis", "rocsparse_dcsric0_analysis", "library"); + subst("cusparseDcsric02_bufferSize", "rocsparse_dcsric0_buffer_size", "library"); + subst("cusparseDcsric02_bufferSizeExt", "rocsparse_dcsric0_buffer_size", "library"); + subst("cusparseDcsrilu02", "rocsparse_dcsrilu0", "library"); + subst("cusparseDcsrilu02_analysis", "rocsparse_dcsrilu0_analysis", "library"); + subst("cusparseDcsrilu02_bufferSize", "rocsparse_dcsrilu0_buffer_size", "library"); + subst("cusparseDcsrilu02_bufferSizeExt", "rocsparse_dcsrilu0_buffer_size", "library"); + subst("cusparseDcsrilu02_numericBoost", "rocsparse_dcsrilu0_numeric_boost", "library"); + subst("cusparseDcsrmm", "rocsparse_dcsrmm", "library"); + subst("cusparseDcsrmm2", "rocsparse_dcsrmm", "library"); + subst("cusparseDcsrmv", "rocsparse_dcsrmv", "library"); + subst("cusparseDcsrsm2_analysis", "rocsparse_dcsrsm_analysis", "library"); + subst("cusparseDcsrsm2_bufferSizeExt", "rocsparse_dcsrsm_buffer_size", "library"); + subst("cusparseDcsrsm2_solve", "rocsparse_dcsrsm_solve", "library"); + subst("cusparseDcsrsv2_analysis", "rocsparse_dcsrsv_analysis", "library"); + subst("cusparseDcsrsv2_bufferSize", "rocsparse_dcsrsv_buffer_size", "library"); + subst("cusparseDcsrsv2_bufferSizeExt", "rocsparse_dcsrsv_buffer_size", "library"); + subst("cusparseDcsrsv2_solve", "rocsparse_dcsrsv_solve", "library"); + subst("cusparseDdense2csc", "rocsparse_ddense2csc", "library"); + subst("cusparseDdense2csr", "rocsparse_ddense2csr", "library"); + subst("cusparseDdoti", "rocsparse_ddoti", "library"); + subst("cusparseDenseToSparse_analysis", "rocsparse_dense_to_sparse", "library"); + subst("cusparseDenseToSparse_bufferSize", "rocsparse_dense_to_sparse", "library"); + subst("cusparseDestroy", "rocsparse_destroy_handle", "library"); + subst("cusparseDestroyBsric02Info", "rocsparse_destroy_mat_info", "library"); + subst("cusparseDestroyBsrilu02Info", "rocsparse_destroy_mat_info", "library"); + subst("cusparseDestroyBsrsm2Info", "rocsparse_destroy_mat_info", "library"); + subst("cusparseDestroyBsrsv2Info", "rocsparse_destroy_mat_info", "library"); + subst("cusparseDestroyColorInfo", "rocsparse_destroy_color_info", "library"); + subst("cusparseDestroyCsrgemm2Info", "rocsparse_destroy_mat_info", "library"); + subst("cusparseDestroyCsric02Info", "rocsparse_destroy_mat_info", "library"); + subst("cusparseDestroyCsrilu02Info", "rocsparse_destroy_mat_info", "library"); + subst("cusparseDestroyCsrsm2Info", "rocsparse_destroy_mat_info", "library"); + subst("cusparseDestroyCsrsv2Info", "rocsparse_destroy_mat_info", "library"); + subst("cusparseDestroyDnMat", "rocsparse_destroy_dnmat_descr", "library"); + subst("cusparseDestroyDnVec", "rocsparse_destroy_dnvec_descr", "library"); + subst("cusparseDestroyHybMat", "rocsparse_destroy_hyb_mat", "library"); + subst("cusparseDestroyMatDescr", "rocsparse_destroy_mat_descr", "library"); + subst("cusparseDestroyPruneInfo", "rocsparse_destroy_mat_info", "library"); + subst("cusparseDestroySpMat", "rocsparse_destroy_spmat_descr", "library"); + subst("cusparseDestroySpVec", "rocsparse_destroy_spvec_descr", "library"); + subst("cusparseDgebsr2csr", "rocsparse_dgebsr2csr", "library"); + subst("cusparseDgebsr2gebsc", "rocsparse_dgebsr2gebsc", "library"); + subst("cusparseDgebsr2gebsc_bufferSize", "rocsparse_dgebsr2gebsc_buffer_size", "library"); + subst("cusparseDgebsr2gebsr", "rocsparse_dgebsr2gebsr", "library"); + subst("cusparseDgebsr2gebsr_bufferSize", "rocsparse_dgebsr2gebsr_buffer_size", "library"); + subst("cusparseDgemvi", "rocsparse_dgemvi", "library"); + subst("cusparseDgemvi_bufferSize", "rocsparse_dgemvi_buffer_size", "library"); + subst("cusparseDgpsvInterleavedBatch", "rocsparse_dgpsv_interleaved_batch", "library"); + subst("cusparseDgpsvInterleavedBatch_bufferSizeExt", "rocsparse_dgpsv_interleaved_batch_buffer_size", "library"); + subst("cusparseDgthr", "rocsparse_dgthr", "library"); + subst("cusparseDgthrz", "rocsparse_dgthrz", "library"); + subst("cusparseDgtsv2", "rocsparse_dgtsv", "library"); + subst("cusparseDgtsv2StridedBatch", "rocsparse_dgtsv_no_pivot_strided_batch", "library"); + subst("cusparseDgtsv2StridedBatch_bufferSizeExt", "rocsparse_dgtsv_no_pivot_strided_batch_buffer_size", "library"); + subst("cusparseDgtsv2_bufferSizeExt", "rocsparse_dgtsv_buffer_size", "library"); + subst("cusparseDgtsv2_nopivot", "rocsparse_dgtsv_no_pivot", "library"); + subst("cusparseDgtsv2_nopivot_bufferSizeExt", "rocsparse_dgtsv_no_pivot_buffer_size", "library"); + subst("cusparseDgtsvInterleavedBatch", "rocsparse_dgtsv_interleaved_batch", "library"); + subst("cusparseDgtsvInterleavedBatch_bufferSizeExt", "rocsparse_dgtsv_interleaved_batch_buffer_size", "library"); + subst("cusparseDhybmv", "rocsparse_dhybmv", "library"); + subst("cusparseDnMatGet", "rocsparse_dnmat_get", "library"); + subst("cusparseDnMatGetStridedBatch", "rocsparse_dnmat_get_strided_batch", "library"); + subst("cusparseDnMatGetValues", "rocsparse_dnmat_get_values", "library"); + subst("cusparseDnMatSetStridedBatch", "rocsparse_dnmat_set_strided_batch", "library"); + subst("cusparseDnMatSetValues", "rocsparse_dnmat_set_values", "library"); + subst("cusparseDnVecGet", "rocsparse_dnvec_get", "library"); + subst("cusparseDnVecGetValues", "rocsparse_dnvec_get_values", "library"); + subst("cusparseDnVecSetValues", "rocsparse_dnvec_set_values", "library"); + subst("cusparseDnnz", "rocsparse_dnnz", "library"); + subst("cusparseDnnz_compress", "rocsparse_dnnz_compress", "library"); + subst("cusparseDpruneCsr2csr", "rocsparse_dprune_csr2csr", "library"); + subst("cusparseDpruneCsr2csrByPercentage", "rocsparse_dprune_csr2csr_by_percentage", "library"); + subst("cusparseDpruneCsr2csrByPercentage_bufferSizeExt", "rocsparse_dprune_csr2csr_by_percentage_buffer_size", "library"); + subst("cusparseDpruneCsr2csrNnz", "rocsparse_dprune_csr2csr_nnz", "library"); + subst("cusparseDpruneCsr2csrNnzByPercentage", "rocsparse_dprune_csr2csr_nnz_by_percentage", "library"); + subst("cusparseDpruneCsr2csr_bufferSizeExt", "rocsparse_dprune_csr2csr_buffer_size", "library"); + subst("cusparseDpruneDense2csr", "rocsparse_dprune_dense2csr", "library"); + subst("cusparseDpruneDense2csrByPercentage", "rocsparse_dprune_dense2csr_by_percentage", "library"); + subst("cusparseDpruneDense2csrByPercentage_bufferSizeExt", "rocsparse_dprune_dense2csr_by_percentage_buffer_size", "library"); + subst("cusparseDpruneDense2csrNnz", "rocsparse_dprune_dense2csr_nnz", "library"); + subst("cusparseDpruneDense2csrNnzByPercentage", "rocsparse_dprune_dense2csr_nnz_by_percentage", "library"); + subst("cusparseDpruneDense2csr_bufferSizeExt", "rocsparse_dprune_dense2csr_buffer_size", "library"); + subst("cusparseDroti", "rocsparse_droti", "library"); + subst("cusparseDsctr", "rocsparse_dsctr", "library"); + subst("cusparseGather", "rocsparse_gather", "library"); + subst("cusparseGetErrorName", "rocsparse_get_status_name", "library"); + subst("cusparseGetErrorString", "rocsparse_get_status_description", "library"); + subst("cusparseGetMatDiagType", "rocsparse_get_mat_diag_type", "library"); + subst("cusparseGetMatFillMode", "rocsparse_get_mat_fill_mode", "library"); + subst("cusparseGetMatIndexBase", "rocsparse_get_mat_index_base", "library"); + subst("cusparseGetMatType", "rocsparse_get_mat_type", "library"); + subst("cusparseGetPointerMode", "rocsparse_get_pointer_mode", "library"); + subst("cusparseGetStream", "rocsparse_get_stream", "library"); + subst("cusparseGetVersion", "rocsparse_get_version", "library"); + subst("cusparseRot", "rocsparse_rot", "library"); + subst("cusparseSDDMM", "rocsparse_sddmm", "library"); + subst("cusparseSDDMM_bufferSize", "rocsparse_sddmm_buffer_size", "library"); + subst("cusparseSDDMM_preprocess", "rocsparse_sddmm_preprocess", "library"); + subst("cusparseSaxpyi", "rocsparse_saxpyi", "library"); + subst("cusparseSbsr2csr", "rocsparse_sbsr2csr", "library"); + subst("cusparseSbsric02", "rocsparse_sbsric0", "library"); + subst("cusparseSbsric02_analysis", "rocsparse_sbsric0_analysis", "library"); + subst("cusparseSbsric02_bufferSize", "rocsparse_sbsric0_buffer_size", "library"); + subst("cusparseSbsrilu02", "rocsparse_sbsrilu0", "library"); + subst("cusparseSbsrilu02_analysis", "rocsparse_sbsrilu0_analysis", "library"); + subst("cusparseSbsrilu02_bufferSize", "rocsparse_sbsrilu0_buffer_size", "library"); + subst("cusparseSbsrilu02_numericBoost", "rocsparse_dsbsrilu0_numeric_boost", "library"); + subst("cusparseSbsrmm", "rocsparse_sbsrmm", "library"); + subst("cusparseSbsrmv", "rocsparse_sbsrmv", "library"); + subst("cusparseSbsrsm2_analysis", "rocsparse_sbsrsm_analysis", "library"); + subst("cusparseSbsrsm2_bufferSize", "rocsparse_sbsrsm_buffer_size", "library"); + subst("cusparseSbsrsm2_solve", "rocsparse_sbsrsm_solve", "library"); + subst("cusparseSbsrsv2_analysis", "rocsparse_sbsrsv_analysis", "library"); + subst("cusparseSbsrsv2_bufferSize", "rocsparse_sbsrsv_buffer_size", "library"); + subst("cusparseSbsrsv2_bufferSizeExt", "rocsparse_sbsrsv_buffer_size", "library"); + subst("cusparseSbsrsv2_solve", "rocsparse_sbsrsv_solve", "library"); + subst("cusparseSbsrxmv", "rocsparse_sbsrxmv", "library"); + subst("cusparseScatter", "rocsparse_scatter", "library"); + subst("cusparseScsc2dense", "rocsparse_scsc2dense", "library"); + subst("cusparseScsr2bsr", "rocsparse_scsr2bsr", "library"); + subst("cusparseScsr2csr_compress", "rocsparse_scsr2csr_compress", "library"); + subst("cusparseScsr2dense", "rocsparse_scsr2dense", "library"); + subst("cusparseScsr2gebsr", "rocsparse_scsr2gebsr", "library"); + subst("cusparseScsr2gebsr_bufferSize", "rocsparse_scsr2gebsr_buffer_size", "library"); + subst("cusparseScsr2hyb", "rocsparse_scsr2hyb", "library"); + subst("cusparseScsrcolor", "rocsparse_scsrcolor", "library"); + subst("cusparseScsrgeam", "rocsparse_scsrgeam", "library"); + subst("cusparseScsrgeam2", "rocsparse_scsrgeam", "library"); + subst("cusparseScsrgemm2", "rocsparse_scsrgemm", "library"); + subst("cusparseScsrgemm2_bufferSizeExt", "rocsparse_scsrgemm_buffer_size", "library"); + subst("cusparseScsric02", "rocsparse_scsric0", "library"); + subst("cusparseScsric02_analysis", "rocsparse_scsric0_analysis", "library"); + subst("cusparseScsric02_bufferSize", "rocsparse_scsric0_buffer_size", "library"); + subst("cusparseScsric02_bufferSizeExt", "rocsparse_scsric0_buffer_size", "library"); + subst("cusparseScsrilu02", "rocsparse_scsrilu0", "library"); + subst("cusparseScsrilu02_analysis", "rocsparse_scsrilu0_analysis", "library"); + subst("cusparseScsrilu02_bufferSize", "rocsparse_scsrilu0_buffer_size", "library"); + subst("cusparseScsrilu02_bufferSizeExt", "rocsparse_scsrilu0_buffer_size", "library"); + subst("cusparseScsrilu02_numericBoost", "rocsparse_dscsrilu0_numeric_boost", "library"); + subst("cusparseScsrmm", "rocsparse_scsrmm", "library"); + subst("cusparseScsrmm2", "rocsparse_scsrmm", "library"); + subst("cusparseScsrmv", "rocsparse_scsrmv", "library"); + subst("cusparseScsrsm2_analysis", "rocsparse_scsrsm_analysis", "library"); + subst("cusparseScsrsm2_bufferSizeExt", "rocsparse_scsrsm_buffer_size", "library"); + subst("cusparseScsrsm2_solve", "rocsparse_scsrsm_solve", "library"); + subst("cusparseScsrsv2_analysis", "rocsparse_scsrsv_analysis", "library"); + subst("cusparseScsrsv2_bufferSize", "rocsparse_scsrsv_buffer_size", "library"); + subst("cusparseScsrsv2_bufferSizeExt", "rocsparse_scsrsv_buffer_size", "library"); + subst("cusparseScsrsv2_solve", "rocsparse_scsrsv_solve", "library"); + subst("cusparseSdense2csc", "rocsparse_sdense2csc", "library"); + subst("cusparseSdense2csr", "rocsparse_sdense2csr", "library"); + subst("cusparseSdoti", "rocsparse_sdoti", "library"); + subst("cusparseSetMatDiagType", "rocsparse_set_mat_diag_type", "library"); + subst("cusparseSetMatFillMode", "rocsparse_set_mat_fill_mode", "library"); + subst("cusparseSetMatIndexBase", "rocsparse_set_mat_index_base", "library"); + subst("cusparseSetMatType", "rocsparse_set_mat_type", "library"); + subst("cusparseSetPointerMode", "rocsparse_set_pointer_mode", "library"); + subst("cusparseSetStream", "rocsparse_set_stream", "library"); + subst("cusparseSgebsr2csr", "rocsparse_sgebsr2csr", "library"); + subst("cusparseSgebsr2gebsc", "rocsparse_sgebsr2gebsc", "library"); + subst("cusparseSgebsr2gebsc_bufferSize", "rocsparse_sgebsr2gebsc_buffer_size", "library"); + subst("cusparseSgebsr2gebsr", "rocsparse_sgebsr2gebsr", "library"); + subst("cusparseSgebsr2gebsr_bufferSize", "rocsparse_sgebsr2gebsr_buffer_size", "library"); + subst("cusparseSgemvi", "rocsparse_sgemvi", "library"); + subst("cusparseSgemvi_bufferSize", "rocsparse_sgemvi_buffer_size", "library"); + subst("cusparseSgpsvInterleavedBatch", "rocsparse_sgpsv_interleaved_batch", "library"); + subst("cusparseSgpsvInterleavedBatch_bufferSizeExt", "rocsparse_sgpsv_interleaved_batch_buffer_size", "library"); + subst("cusparseSgthr", "rocsparse_sgthr", "library"); + subst("cusparseSgthrz", "rocsparse_sgthrz", "library"); + subst("cusparseSgtsv2", "rocsparse_sgtsv", "library"); + subst("cusparseSgtsv2StridedBatch", "rocsparse_sgtsv_no_pivot_strided_batch", "library"); + subst("cusparseSgtsv2StridedBatch_bufferSizeExt", "rocsparse_sgtsv_no_pivot_strided_batch_buffer_size", "library"); + subst("cusparseSgtsv2_bufferSizeExt", "rocsparse_sgtsv_buffer_size", "library"); + subst("cusparseSgtsv2_nopivot", "rocsparse_sgtsv_no_pivot", "library"); + subst("cusparseSgtsv2_nopivot_bufferSizeExt", "rocsparse_sgtsv_no_pivot_buffer_size", "library"); + subst("cusparseSgtsvInterleavedBatch", "rocsparse_sgtsv_interleaved_batch", "library"); + subst("cusparseSgtsvInterleavedBatch_bufferSizeExt", "rocsparse_sgtsv_interleaved_batch_buffer_size", "library"); + subst("cusparseShybmv", "rocsparse_shybmv", "library"); + subst("cusparseSnnz", "rocsparse_snnz", "library"); + subst("cusparseSnnz_compress", "rocsparse_snnz_compress", "library"); + subst("cusparseSpMM", "rocsparse_spmm", "library"); + subst("cusparseSpMM_bufferSize", "rocsparse_spmm", "library"); + subst("cusparseSpMM_preprocess", "rocsparse_spmm", "library"); + subst("cusparseSpMV", "rocsparse_spmv", "library"); + subst("cusparseSpMV_bufferSize", "rocsparse_spmv", "library"); + subst("cusparseSpMatGetAttribute", "rocsparse_spmat_get_attribute", "library"); + subst("cusparseSpMatGetFormat", "rocsparse_spmat_get_format", "library"); + subst("cusparseSpMatGetIndexBase", "rocsparse_spmat_get_index_base", "library"); + subst("cusparseSpMatGetSize", "rocsparse_spmat_get_size", "library"); + subst("cusparseSpMatGetStridedBatch", "rocsparse_spmat_get_strided_batch", "library"); + subst("cusparseSpMatGetValues", "rocsparse_spmat_get_values", "library"); + subst("cusparseSpMatSetAttribute", "rocsparse_spmat_set_attribute", "library"); + subst("cusparseSpMatSetStridedBatch", "rocsparse_spmat_set_strided_batch", "library"); + subst("cusparseSpMatSetValues", "rocsparse_spmat_set_values", "library"); + subst("cusparseSpSM_analysis", "rocsparse_spsm", "library"); + subst("cusparseSpSM_solve", "rocsparse_spsm", "library"); + subst("cusparseSpSV_bufferSize", "rocsparse_spsv", "library"); + subst("cusparseSpVV", "rocsparse_spvv", "library"); + subst("cusparseSpVV_bufferSize", "rocsparse_spvv", "library"); + subst("cusparseSpVecGet", "rocsparse_spvec_get", "library"); + subst("cusparseSpVecGetIndexBase", "rocsparse_spvec_get_index_base", "library"); + subst("cusparseSpVecGetValues", "rocsparse_spvec_get_values", "library"); + subst("cusparseSpVecSetValues", "rocsparse_spvec_set_values", "library"); + subst("cusparseSparseToDense", "rocsparse_sparse_to_dense", "library"); + subst("cusparseSparseToDense_bufferSize", "rocsparse_sparse_to_dense", "library"); + subst("cusparseSpruneCsr2csr", "rocsparse_sprune_csr2csr", "library"); + subst("cusparseSpruneCsr2csrByPercentage", "rocsparse_sprune_csr2csr_by_percentage", "library"); + subst("cusparseSpruneCsr2csrByPercentage_bufferSizeExt", "rocsparse_sprune_csr2csr_by_percentage_buffer_size", "library"); + subst("cusparseSpruneCsr2csrNnz", "rocsparse_sprune_csr2csr_nnz", "library"); + subst("cusparseSpruneCsr2csrNnzByPercentage", "rocsparse_sprune_csr2csr_nnz_by_percentage", "library"); + subst("cusparseSpruneCsr2csr_bufferSizeExt", "rocsparse_sprune_csr2csr_buffer_size", "library"); + subst("cusparseSpruneDense2csr", "rocsparse_sprune_dense2csr", "library"); + subst("cusparseSpruneDense2csrByPercentage", "rocsparse_sprune_dense2csr_by_percentage", "library"); + subst("cusparseSpruneDense2csrByPercentage_bufferSizeExt", "rocsparse_sprune_dense2csr_by_percentage_buffer_size", "library"); + subst("cusparseSpruneDense2csrNnz", "rocsparse_sprune_dense2csr_nnz", "library"); + subst("cusparseSpruneDense2csrNnzByPercentage", "rocsparse_sprune_dense2csr_nnz_by_percentage", "library"); + subst("cusparseSpruneDense2csr_bufferSizeExt", "rocsparse_sprune_dense2csr_buffer_size", "library"); + subst("cusparseSroti", "rocsparse_sroti", "library"); + subst("cusparseSsctr", "rocsparse_ssctr", "library"); + subst("cusparseXbsric02_zeroPivot", "rocsparse_bsric0_zero_pivot", "library"); + subst("cusparseXbsrilu02_zeroPivot", "rocsparse_bsrilu0_zero_pivot", "library"); + subst("cusparseXbsrsm2_zeroPivot", "rocsparse_bsrsm_zero_pivot", "library"); + subst("cusparseXbsrsv2_zeroPivot", "rocsparse_bsrsv_zero_pivot", "library"); + subst("cusparseXcoo2csr", "rocsparse_coo2csr", "library"); + subst("cusparseXcoosortByColumn", "rocsparse_coosort_by_column", "library"); + subst("cusparseXcoosortByRow", "rocsparse_coosort_by_row", "library"); + subst("cusparseXcoosort_bufferSizeExt", "rocsparse_coosort_buffer_size", "library"); + subst("cusparseXcscsort", "rocsparse_cscsort", "library"); + subst("cusparseXcscsort_bufferSizeExt", "rocsparse_cscsort_buffer_size", "library"); + subst("cusparseXcsr2bsrNnz", "rocsparse_csr2bsr_nnz", "library"); + subst("cusparseXcsr2coo", "rocsparse_csr2coo", "library"); + subst("cusparseXcsr2gebsrNnz", "rocsparse_csr2gebsr_nnz", "library"); + subst("cusparseXcsrgeam2Nnz", "rocsparse_csrgeam_nnz", "library"); + subst("cusparseXcsrgeamNnz", "rocsparse_csrgeam_nnz", "library"); + subst("cusparseXcsrgemm2Nnz", "rocsparse_csrgemm_nnz", "library"); + subst("cusparseXcsric02_zeroPivot", "rocsparse_csric0_zero_pivot", "library"); + subst("cusparseXcsrilu02_zeroPivot", "rocsparse_csrilu0_zero_pivot", "library"); + subst("cusparseXcsrsm2_zeroPivot", "rocsparse_csrsm_zero_pivot", "library"); + subst("cusparseXcsrsort", "rocsparse_csrsort", "library"); + subst("cusparseXcsrsort_bufferSizeExt", "rocsparse_csrsort_buffer_size", "library"); + subst("cusparseXcsrsv2_zeroPivot", "rocsparse_csrsv_zero_pivot", "library"); + subst("cusparseXgebsr2gebsrNnz", "rocsparse_gebsr2gebsr_nnz", "library"); + subst("cusparseZaxpyi", "rocsparse_zaxpyi", "library"); + subst("cusparseZbsr2csr", "rocsparse_zbsr2csr", "library"); + subst("cusparseZbsric02", "rocsparse_zbsric0", "library"); + subst("cusparseZbsric02_analysis", "rocsparse_zbsric0_analysis", "library"); + subst("cusparseZbsric02_bufferSize", "rocsparse_zbsric0_buffer_size", "library"); + subst("cusparseZbsrilu02", "rocsparse_zbsrilu0", "library"); + subst("cusparseZbsrilu02_analysis", "rocsparse_zbsrilu0_analysis", "library"); + subst("cusparseZbsrilu02_bufferSize", "rocsparse_zbsrilu0_buffer_size", "library"); + subst("cusparseZbsrilu02_numericBoost", "rocsparse_zbsrilu0_numeric_boost", "library"); + subst("cusparseZbsrmm", "rocsparse_zbsrmm", "library"); + subst("cusparseZbsrmv", "rocsparse_zbsrmv", "library"); + subst("cusparseZbsrsm2_analysis", "rocsparse_zbsrsm_analysis", "library"); + subst("cusparseZbsrsm2_bufferSize", "rocsparse_zbsrsm_buffer_size", "library"); + subst("cusparseZbsrsm2_solve", "rocsparse_zbsrsm_solve", "library"); + subst("cusparseZbsrsv2_analysis", "rocsparse_zbsrsv_analysis", "library"); + subst("cusparseZbsrsv2_bufferSize", "rocsparse_zbsrsv_buffer_size", "library"); + subst("cusparseZbsrsv2_bufferSizeExt", "rocsparse_zbsrsv_buffer_size", "library"); + subst("cusparseZbsrsv2_solve", "rocsparse_zbsrsv_solve", "library"); + subst("cusparseZbsrxmv", "rocsparse_zbsrxmv", "library"); + subst("cusparseZcsc2dense", "rocsparse_zcsc2dense", "library"); + subst("cusparseZcsr2bsr", "rocsparse_zcsr2bsr", "library"); + subst("cusparseZcsr2csr_compress", "rocsparse_zcsr2csr_compress", "library"); + subst("cusparseZcsr2dense", "rocsparse_zcsr2dense", "library"); + subst("cusparseZcsr2gebsr", "rocsparse_zcsr2gebsr", "library"); + subst("cusparseZcsr2gebsr_bufferSize", "rocsparse_zcsr2gebsr_buffer_size", "library"); + subst("cusparseZcsr2hyb", "rocsparse_zcsr2hyb", "library"); + subst("cusparseZcsrcolor", "rocsparse_zcsrcolor", "library"); + subst("cusparseZcsrgeam", "rocsparse_zcsrgeam", "library"); + subst("cusparseZcsrgeam2", "rocsparse_zcsrgeam", "library"); + subst("cusparseZcsrgemm2", "rocsparse_zcsrgemm", "library"); + subst("cusparseZcsrgemm2_bufferSizeExt", "rocsparse_zcsrgemm_buffer_size", "library"); + subst("cusparseZcsric02", "rocsparse_zcsric0", "library"); + subst("cusparseZcsric02_analysis", "rocsparse_zcsric0_analysis", "library"); + subst("cusparseZcsric02_bufferSize", "rocsparse_zcsric0_buffer_size", "library"); + subst("cusparseZcsric02_bufferSizeExt", "rocsparse_zcsric0_buffer_size", "library"); + subst("cusparseZcsrilu02", "rocsparse_zcsrilu0", "library"); + subst("cusparseZcsrilu02_analysis", "rocsparse_zcsrilu0_analysis", "library"); + subst("cusparseZcsrilu02_bufferSize", "rocsparse_zcsrilu0_buffer_size", "library"); + subst("cusparseZcsrilu02_bufferSizeExt", "rocsparse_zcsrilu0_buffer_size", "library"); + subst("cusparseZcsrilu02_numericBoost", "rocsparse_zcsrilu0_numeric_boost", "library"); + subst("cusparseZcsrmm", "rocsparse_zcsrmm", "library"); + subst("cusparseZcsrmm2", "rocsparse_zcsrmm", "library"); + subst("cusparseZcsrmv", "rocsparse_zcsrmv", "library"); + subst("cusparseZcsrsm2_analysis", "rocsparse_zcsrsm_analysis", "library"); + subst("cusparseZcsrsm2_bufferSizeExt", "rocsparse_zcsrsm_buffer_size", "library"); + subst("cusparseZcsrsm2_solve", "rocsparse_zcsrsm_solve", "library"); + subst("cusparseZcsrsv2_analysis", "rocsparse_zcsrsv_analysis", "library"); + subst("cusparseZcsrsv2_bufferSize", "rocsparse_zcsrsv_buffer_size", "library"); + subst("cusparseZcsrsv2_bufferSizeExt", "rocsparse_zcsrsv_buffer_size", "library"); + subst("cusparseZcsrsv2_solve", "rocsparse_zcsrsv_solve", "library"); + subst("cusparseZdense2csc", "rocsparse_zdense2csc", "library"); + subst("cusparseZdense2csr", "rocsparse_zdense2csr", "library"); + subst("cusparseZdotci", "rocsparse_zdotci", "library"); + subst("cusparseZdoti", "rocsparse_zdoti", "library"); + subst("cusparseZgebsr2csr", "rocsparse_zgebsr2csr", "library"); + subst("cusparseZgebsr2gebsc", "rocsparse_zgebsr2gebsc", "library"); + subst("cusparseZgebsr2gebsc_bufferSize", "rocsparse_zgebsr2gebsc_buffer_size", "library"); + subst("cusparseZgebsr2gebsr", "rocsparse_zgebsr2gebsr", "library"); + subst("cusparseZgebsr2gebsr_bufferSize", "rocsparse_zgebsr2gebsr_buffer_size", "library"); + subst("cusparseZgemvi", "rocsparse_zgemvi", "library"); + subst("cusparseZgemvi_bufferSize", "rocsparse_zgemvi_buffer_size", "library"); + subst("cusparseZgpsvInterleavedBatch", "rocsparse_zgpsv_interleaved_batch", "library"); + subst("cusparseZgpsvInterleavedBatch_bufferSizeExt", "rocsparse_zgpsv_interleaved_batch_buffer_size", "library"); + subst("cusparseZgthr", "rocsparse_zgthr", "library"); + subst("cusparseZgthrz", "rocsparse_zgthrz", "library"); + subst("cusparseZgtsv2", "rocsparse_zgtsv", "library"); + subst("cusparseZgtsv2StridedBatch", "rocsparse_zgtsv_no_pivot_strided_batch", "library"); + subst("cusparseZgtsv2StridedBatch_bufferSizeExt", "rocsparse_zgtsv_no_pivot_strided_batch_buffer_size", "library"); + subst("cusparseZgtsv2_bufferSizeExt", "rocsparse_zgtsv_buffer_size", "library"); + subst("cusparseZgtsv2_nopivot", "rocsparse_zgtsv_no_pivot", "library"); + subst("cusparseZgtsv2_nopivot_bufferSizeExt", "rocsparse_zgtsv_no_pivot_buffer_size", "library"); + subst("cusparseZgtsvInterleavedBatch", "rocsparse_zgtsv_interleaved_batch", "library"); + subst("cusparseZgtsvInterleavedBatch_bufferSizeExt", "rocsparse_zgtsv_interleaved_batch_buffer_size", "library"); + subst("cusparseZhybmv", "rocsparse_zhybmv", "library"); + subst("cusparseZnnz", "rocsparse_znnz", "library"); + subst("cusparseZnnz_compress", "rocsparse_znnz_compress", "library"); + subst("cusparseZsctr", "rocsparse_zsctr", "library"); + subst("__half", "rocblas_half", "device_type"); + subst("__nv_bfloat16", "rocblas_bfloat16", "device_type"); + subst("cublas.h", "rocblas.h", "include_cuda_main_header"); + subst("cublas_v2.h", "rocblas.h", "include_cuda_main_header_v2"); + subst("bsric02Info", "_rocsparse_mat_info", "type"); + subst("bsric02Info_t", "rocsparse_mat_info", "type"); + subst("bsrilu02Info", "_rocsparse_mat_info", "type"); + subst("bsrilu02Info_t", "rocsparse_mat_info", "type"); + subst("bsrsm2Info", "_rocsparse_mat_info", "type"); + subst("bsrsm2Info_t", "rocsparse_mat_info", "type"); + subst("bsrsv2Info", "_rocsparse_mat_info", "type"); + subst("bsrsv2Info_t", "rocsparse_mat_info", "type"); + subst("csrgemm2Info", "_rocsparse_mat_info", "type"); + subst("csrgemm2Info_t", "rocsparse_mat_info", "type"); + subst("csric02Info", "_rocsparse_mat_info", "type"); + subst("csric02Info_t", "rocsparse_mat_info", "type"); + subst("csrilu02Info", "_rocsparse_mat_info", "type"); + subst("csrilu02Info_t", "rocsparse_mat_info", "type"); + subst("csrsm2Info", "_rocsparse_mat_info", "type"); + subst("csrsm2Info_t", "rocsparse_mat_info", "type"); + subst("csrsv2Info", "_rocsparse_mat_descr", "type"); + subst("csrsv2Info_t", "rocsparse_mat_descr", "type"); + subst("cuComplex", "rocblas_float_complex", "type"); + subst("cuDoubleComplex", "rocblas_double_complex", "type"); + subst("cuFloatComplex", "rocblas_float_complex", "type"); + subst("cublasAtomicsMode_t", "rocblas_atomics_mode", "type"); + subst("cublasComputeType_t", "rocblas_computetype", "type"); + subst("cublasContext", "_rocblas_handle", "type"); + subst("cublasDataType_t", "rocblas_datatype", "type"); + subst("cublasDiagType_t", "rocblas_diagonal", "type"); + subst("cublasFillMode_t", "rocblas_fill", "type"); + subst("cublasGemmAlgo_t", "rocblas_gemm_algo", "type"); + subst("cublasHandle_t", "rocblas_handle", "type"); + subst("cublasMath_t", "rocblas_math_mode", "type"); + subst("cublasOperation_t", "rocblas_operation", "type"); + subst("cublasPointerMode_t", "rocblas_pointer_mode", "type"); + subst("cublasSideMode_t", "rocblas_side", "type"); + subst("cublasStatus", "rocblas_status", "type"); + subst("cublasStatus_t", "rocblas_status", "type"); + subst("cudaDataType", "rocblas_datatype", "type"); + subst("cudaDataType_t", "rocblas_datatype_", "type"); + subst("cudnnActivationDescriptor_t", "miopenActivationDescriptor_t", "type"); + subst("cudnnActivationMode_t", "miopenActivationMode_t", "type"); + subst("cudnnBatchNormMode_t", "miopenBatchNormMode_t", "type"); + subst("cudnnCTCLossAlgo_t", "miopenCTCLossAlgo_t", "type"); + subst("cudnnCTCLossDescriptor_t", "miopenCTCLossDescriptor_t", "type"); + subst("cudnnConvolutionBwdDataAlgoPerfStruct", "miopenConvAlgoPerf_t", "type"); + subst("cudnnConvolutionBwdDataAlgoPerf_t", "miopenConvAlgoPerf_t", "type"); + subst("cudnnConvolutionBwdDataAlgo_t", "miopenConvBwdDataAlgorithm_t", "type"); + subst("cudnnConvolutionDescriptor_t", "miopenConvolutionDescriptor_t", "type"); + subst("cudnnConvolutionFwdAlgoPerfStruct", "miopenConvAlgoPerf_t", "type"); + subst("cudnnConvolutionFwdAlgoPerf_t", "miopenConvAlgoPerf_t", "type"); + subst("cudnnConvolutionFwdAlgo_t", "miopenConvFwdAlgorithm_t", "type"); + subst("cudnnConvolutionMode_t", "miopenConvolutionMode_t", "type"); + subst("cudnnDataType_t", "miopenDataType_t", "type"); + subst("cudnnDirectionMode_t", "miopenRNNDirectionMode_t", "type"); + subst("cudnnDropoutDescriptor_t", "miopenDropoutDescriptor_t", "type"); + subst("cudnnFilterDescriptor_t", "miopenTensorDescriptor_t", "type"); + subst("cudnnHandle_t", "miopenHandle_t", "type"); + subst("cudnnIndicesType_t", "miopenIndicesType_t", "type"); + subst("cudnnLRNDescriptor_t", "miopenLRNDescriptor_t", "type"); + subst("cudnnLRNMode_t", "miopenLRNMode_t", "type"); + subst("cudnnNanPropagation_t", "miopenNanPropagation_t", "type"); + subst("cudnnOpTensorOp_t", "miopenTensorOp_t", "type"); + subst("cudnnPoolingDescriptor_t", "miopenPoolingDescriptor_t", "type"); + subst("cudnnPoolingMode_t", "miopenPoolingMode_t", "type"); + subst("cudnnRNNAlgo_t", "miopenRNNAlgo_t", "type"); + subst("cudnnRNNBiasMode_t", "miopenRNNBiasMode_t", "type"); + subst("cudnnRNNDescriptor_t", "miopenRNNDescriptor_t", "type"); + subst("cudnnRNNInputMode_t", "miopenRNNInputMode_t", "type"); + subst("cudnnRNNMode_t", "miopenRNNMode_t", "type"); + subst("cudnnReduceTensorDescriptor_t", "miopenReduceTensorDescriptor_t", "type"); + subst("cudnnReduceTensorIndices_t", "miopenReduceTensorIndices_t", "type"); + subst("cudnnReduceTensorOp_t", "miopenReduceTensorOp_t", "type"); + subst("cudnnSoftmaxAlgorithm_t", "miopenSoftmaxAlgorithm_t", "type"); + subst("cudnnSoftmaxMode_t", "miopenSoftmaxMode_t", "type"); + subst("cudnnStatus_t", "miopenStatus_t", "type"); + subst("cudnnTensorDescriptor_t", "miopenTensorDescriptor_t", "type"); + subst("cusolverDnHandle_t", "rocblas_handle", "type"); + subst("cusolverEigMode_t", "rocblas_evect", "type"); + subst("cusolverEigRange_t", "rocblas_erange", "type"); + subst("cusolverEigType_t", "rocblas_eform", "type"); + subst("cusolverStatus_t", "rocblas_status", "type"); + subst("cusparseAction_t", "rocsparse_action", "type"); + subst("cusparseColorInfo", "_rocsparse_color_info", "type"); + subst("cusparseColorInfo_t", "rocsparse_color_info", "type"); + subst("cusparseConstDnMatDescr_t", "rocsparse_const_dnmat_descr", "type"); + subst("cusparseConstDnVecDescr_t", "rocsparse_const_dnvec_descr", "type"); + subst("cusparseConstSpMatDescr_t", "rocsparse_const_spmat_descr", "type"); + subst("cusparseConstSpVecDescr_t", "rocsparse_const_spvec_descr", "type"); + subst("cusparseContext", "_rocsparse_handle", "type"); + subst("cusparseDenseToSparseAlg_t", "rocsparse_dense_to_sparse_alg", "type"); + subst("cusparseDiagType_t", "rocsparse_diag_type", "type"); + subst("cusparseDirection_t", "rocsparse_direction", "type"); + subst("cusparseDnMatDescr", "_rocsparse_dnmat_descr", "type"); + subst("cusparseDnMatDescr_t", "rocsparse_dnmat_descr", "type"); + subst("cusparseDnVecDescr", "_rocsparse_dnvec_descr", "type"); + subst("cusparseDnVecDescr_t", "rocsparse_dnvec_descr", "type"); + subst("cusparseFillMode_t", "rocsparse_fill_mode", "type"); + subst("cusparseFormat_t", "rocsparse_format", "type"); + subst("cusparseHandle_t", "rocsparse_handle", "type"); + subst("cusparseHybMat", "_rocsparse_hyb_mat", "type"); + subst("cusparseHybMat_t", "rocsparse_hyb_mat", "type"); + subst("cusparseHybPartition_t", "rocsparse_hyb_partition", "type"); + subst("cusparseIndexBase_t", "rocsparse_index_base", "type"); + subst("cusparseIndexType_t", "rocsparse_indextype", "type"); + subst("cusparseMatDescr", "_rocsparse_mat_descr", "type"); + subst("cusparseMatDescr_t", "rocsparse_mat_descr", "type"); + subst("cusparseMatrixType_t", "rocsparse_matrix_type", "type"); + subst("cusparseOperation_t", "rocsparse_operation", "type"); + subst("cusparseOrder_t", "rocsparse_order", "type"); + subst("cusparsePointerMode_t", "rocsparse_pointer_mode", "type"); + subst("cusparseSDDMMAlg_t", "rocsparse_sddmm_alg", "type"); + subst("cusparseSolvePolicy_t", "rocsparse_solve_policy", "type"); + subst("cusparseSpGEMMAlg_t", "rocsparse_spgemm_alg", "type"); + subst("cusparseSpMMAlg_t", "rocsparse_spmm_alg", "type"); + subst("cusparseSpMVAlg_t", "rocsparse_spmv_alg", "type"); + subst("cusparseSpMatAttribute_t", "rocsparse_spmat_attribute", "type"); + subst("cusparseSpMatDescr", "_rocsparse_spmat_descr", "type"); + subst("cusparseSpMatDescr_t", "rocsparse_spmat_descr", "type"); + subst("cusparseSpSMAlg_t", "rocsparse_spsm_alg", "type"); + subst("cusparseSpSVAlg_t", "rocsparse_spsv_alg", "type"); + subst("cusparseSpVecDescr", "_rocsparse_spvec_descr", "type"); + subst("cusparseSpVecDescr_t", "rocsparse_spvec_descr", "type"); + subst("cusparseSparseToDenseAlg_t", "rocsparse_sparse_to_dense_alg", "type"); + subst("cusparseStatus_t", "rocsparse_status", "type"); + subst("pruneInfo", "_rocsparse_mat_info", "type"); + subst("pruneInfo_t", "rocsparse_mat_info", "type"); + subst("CUBLAS_ATOMICS_ALLOWED", "rocblas_atomics_allowed", "numeric_literal"); + subst("CUBLAS_ATOMICS_NOT_ALLOWED", "rocblas_atomics_not_allowed", "numeric_literal"); + subst("CUBLAS_COMPUTE_32F", "rocblas_compute_type_f32", "numeric_literal"); + subst("CUBLAS_DEFAULT_MATH", "rocblas_default_math", "numeric_literal"); + subst("CUBLAS_DIAG_NON_UNIT", "rocblas_diagonal_non_unit", "numeric_literal"); + subst("CUBLAS_DIAG_UNIT", "rocblas_diagonal_unit", "numeric_literal"); + subst("CUBLAS_FILL_MODE_FULL", "rocblas_fill_full", "numeric_literal"); + subst("CUBLAS_FILL_MODE_LOWER", "rocblas_fill_lower", "numeric_literal"); + subst("CUBLAS_FILL_MODE_UPPER", "rocblas_fill_upper", "numeric_literal"); + subst("CUBLAS_GEMM_DEFAULT", "rocblas_gemm_algo_standard", "numeric_literal"); + subst("CUBLAS_GEMM_DFALT", "rocblas_gemm_algo_standard", "numeric_literal"); + subst("CUBLAS_OP_C", "rocblas_operation_conjugate_transpose", "numeric_literal"); + subst("CUBLAS_OP_HERMITAN", "rocblas_operation_conjugate_transpose", "numeric_literal"); + subst("CUBLAS_OP_N", "rocblas_operation_none", "numeric_literal"); + subst("CUBLAS_OP_T", "rocblas_operation_transpose", "numeric_literal"); + subst("CUBLAS_POINTER_MODE_DEVICE", "rocblas_pointer_mode_device", "numeric_literal"); + subst("CUBLAS_POINTER_MODE_HOST", "rocblas_pointer_mode_host", "numeric_literal"); + subst("CUBLAS_SIDE_LEFT", "rocblas_side_left", "numeric_literal"); + subst("CUBLAS_SIDE_RIGHT", "rocblas_side_right", "numeric_literal"); + subst("CUBLAS_STATUS_ALLOC_FAILED", "rocblas_status_not_implemented", "numeric_literal"); + subst("CUBLAS_STATUS_ARCH_MISMATCH", "rocblas_status_arch_mismatch", "numeric_literal"); + subst("CUBLAS_STATUS_EXECUTION_FAILED", "rocblas_status_memory_error", "numeric_literal"); + subst("CUBLAS_STATUS_INTERNAL_ERROR", "rocblas_status_internal_error", "numeric_literal"); + subst("CUBLAS_STATUS_INVALID_VALUE", "rocblas_status_invalid_value", "numeric_literal"); + subst("CUBLAS_STATUS_MAPPING_ERROR", "rocblas_status_invalid_size", "numeric_literal"); + subst("CUBLAS_STATUS_NOT_INITIALIZED", "rocblas_status_invalid_handle", "numeric_literal"); + subst("CUBLAS_STATUS_NOT_SUPPORTED", "rocblas_status_perf_degraded", "numeric_literal"); + subst("CUBLAS_STATUS_SUCCESS", "rocblas_status_success", "numeric_literal"); + subst("CUDA_C_16BF", "rocblas_datatype_bf16_c", "numeric_literal"); + subst("CUDA_C_16F", "rocblas_datatype_f16_c", "numeric_literal"); + subst("CUDA_C_32F", "rocblas_datatype_f32_c", "numeric_literal"); + subst("CUDA_C_32I", "rocblas_datatype_i32_c", "numeric_literal"); + subst("CUDA_C_32U", "rocblas_datatype_u32_c", "numeric_literal"); + subst("CUDA_C_64F", "rocblas_datatype_f64_c", "numeric_literal"); + subst("CUDA_C_8I", "rocblas_datatype_i8_c", "numeric_literal"); + subst("CUDA_C_8U", "rocblas_datatype_u8_c", "numeric_literal"); + subst("CUDA_R_16BF", "rocblas_datatype_bf16_r", "numeric_literal"); + subst("CUDA_R_16F", "rocblas_datatype_f16_r", "numeric_literal"); + subst("CUDA_R_32F", "rocblas_datatype_f32_r", "numeric_literal"); + subst("CUDA_R_32I", "rocblas_datatype_i32_r", "numeric_literal"); + subst("CUDA_R_32U", "rocblas_datatype_u32_r", "numeric_literal"); + subst("CUDA_R_64F", "rocblas_datatype_f64_r", "numeric_literal"); + subst("CUDA_R_8I", "rocblas_datatype_i8_r", "numeric_literal"); + subst("CUDA_R_8U", "rocblas_datatype_u8_r", "numeric_literal"); + subst("CUDNN_16BIT_INDICES", "MIOPEN_16BIT_INDICES", "numeric_literal"); + subst("CUDNN_32BIT_INDICES", "MIOPEN_32BIT_INDICES", "numeric_literal"); + subst("CUDNN_64BIT_INDICES", "MIOPEN_64BIT_INDICES", "numeric_literal"); + subst("CUDNN_8BIT_INDICES", "MIOPEN_8BIT_INDICES", "numeric_literal"); + subst("CUDNN_ACTIVATION_CLIPPED_RELU", "miopenActivationCLIPPEDRELU", "numeric_literal"); + subst("CUDNN_ACTIVATION_ELU", "miopenActivationELU", "numeric_literal"); + subst("CUDNN_ACTIVATION_IDENTITY", "miopenActivationPASTHRU", "numeric_literal"); + subst("CUDNN_ACTIVATION_RELU", "miopenActivationRELU", "numeric_literal"); + subst("CUDNN_ACTIVATION_TANH", "miopenActivationTANH", "numeric_literal"); + subst("CUDNN_BATCHNORM_PER_ACTIVATION", "miopenBNPerActivation", "numeric_literal"); + subst("CUDNN_BATCHNORM_SPATIAL", "miopenBNSpatial", "numeric_literal"); + subst("CUDNN_BIDIRECTIONAL", "miopenRNNbidirection", "numeric_literal"); + subst("CUDNN_CONVOLUTION_BWD_DATA_ALGO_0", "miopenConvolutionBwdDataAlgoGEMM", "numeric_literal"); + subst("CUDNN_CONVOLUTION_BWD_DATA_ALGO_1", "miopenConvolutionBwdDataAlgoDirect", "numeric_literal"); + subst("CUDNN_CONVOLUTION_BWD_DATA_ALGO_FFT", "miopenConvolutionBwdDataAlgoFFT", "numeric_literal"); + subst("CUDNN_CONVOLUTION_BWD_DATA_ALGO_WINOGRAD", "miopenConvolutionBwdDataAlgoWinograd", "numeric_literal"); + subst("CUDNN_CONVOLUTION_FWD_ALGO_DIRECT", "miopenConvolutionFwdAlgoDirect", "numeric_literal"); + subst("CUDNN_CONVOLUTION_FWD_ALGO_FFT", "miopenConvolutionFwdAlgoFFT", "numeric_literal"); + subst("CUDNN_CONVOLUTION_FWD_ALGO_GEMM", "miopenConvolutionFwdAlgoGEMM", "numeric_literal"); + subst("CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_GEMM", "miopenConvolutionFwdAlgoImplicitGEMM", "numeric_literal"); + subst("CUDNN_CONVOLUTION_FWD_ALGO_WINOGRAD", "miopenConvolutionFwdAlgoWinograd", "numeric_literal"); + subst("CUDNN_CTC_LOSS_ALGO_DETERMINISTIC", "MIOPEN_CTC_LOSS_ALGO_DETERMINISTIC", "numeric_literal"); + subst("CUDNN_DATA_BFLOAT16", "miopenBFloat16", "numeric_literal"); + subst("CUDNN_DATA_DOUBLE", "miopenDouble", "numeric_literal"); + subst("CUDNN_DATA_FLOAT", "miopenFloat", "numeric_literal"); + subst("CUDNN_DATA_HALF", "miopenHalf", "numeric_literal"); + subst("CUDNN_DATA_INT32", "miopenInt32", "numeric_literal"); + subst("CUDNN_DATA_INT8", "miopenInt8", "numeric_literal"); + subst("CUDNN_DATA_INT8x4", "miopenInt8x4", "numeric_literal"); + subst("CUDNN_GRU", "miopenGRU", "numeric_literal"); + subst("CUDNN_LINEAR_INPUT", "miopenRNNlinear", "numeric_literal"); + subst("CUDNN_LRN_CROSS_CHANNEL_DIM1", "miopenLRNCrossChannel", "numeric_literal"); + subst("CUDNN_LSTM", "miopenLSTM", "numeric_literal"); + subst("CUDNN_NOT_PROPAGATE_NAN", "MIOPEN_NOT_PROPAGATE_NAN", "numeric_literal"); + subst("CUDNN_OP_TENSOR_ADD", "miopenTensorOpAdd", "numeric_literal"); + subst("CUDNN_OP_TENSOR_MAX", "miopenTensorOpMax", "numeric_literal"); + subst("CUDNN_OP_TENSOR_MIN", "miopenTensorOpMin", "numeric_literal"); + subst("CUDNN_OP_TENSOR_MUL", "miopenTensorOpMul", "numeric_literal"); + subst("CUDNN_POOLING_MAX", "miopenPoolingMax", "numeric_literal"); + subst("CUDNN_PROPAGATE_NAN", "MIOPEN_PROPAGATE_NAN", "numeric_literal"); + subst("CUDNN_REDUCE_TENSOR_ADD", "MIOPEN_REDUCE_TENSOR_ADD", "numeric_literal"); + subst("CUDNN_REDUCE_TENSOR_AMAX", "MIOPEN_REDUCE_TENSOR_AMAX", "numeric_literal"); + subst("CUDNN_REDUCE_TENSOR_AVG", "MIOPEN_REDUCE_TENSOR_AVG", "numeric_literal"); + subst("CUDNN_REDUCE_TENSOR_FLATTENED_INDICES", "MIOPEN_REDUCE_TENSOR_FLATTENED_INDICES", "numeric_literal"); + subst("CUDNN_REDUCE_TENSOR_MAX", "MIOPEN_REDUCE_TENSOR_MAX", "numeric_literal"); + subst("CUDNN_REDUCE_TENSOR_MIN", "MIOPEN_REDUCE_TENSOR_MIN", "numeric_literal"); + subst("CUDNN_REDUCE_TENSOR_MUL", "MIOPEN_REDUCE_TENSOR_MUL", "numeric_literal"); + subst("CUDNN_REDUCE_TENSOR_NORM1", "MIOPEN_REDUCE_TENSOR_NORM1", "numeric_literal"); + subst("CUDNN_REDUCE_TENSOR_NORM2", "MIOPEN_REDUCE_TENSOR_NORM2", "numeric_literal"); + subst("CUDNN_REDUCE_TENSOR_NO_INDICES", "MIOPEN_REDUCE_TENSOR_NO_INDICES", "numeric_literal"); + subst("CUDNN_RNN_ALGO_STANDARD", "miopenRNNdefault", "numeric_literal"); + subst("CUDNN_RNN_DOUBLE_BIAS", "miopenRNNwithBias", "numeric_literal"); + subst("CUDNN_RNN_NO_BIAS", "miopenRNNNoBias", "numeric_literal"); + subst("CUDNN_RNN_RELU", "miopenRNNRELU", "numeric_literal"); + subst("CUDNN_RNN_SINGLE_INP_BIAS", "miopenRNNwithBias", "numeric_literal"); + subst("CUDNN_RNN_SINGLE_REC_BIAS", "miopenRNNwithBias", "numeric_literal"); + subst("CUDNN_RNN_TANH", "miopenRNNTANH", "numeric_literal"); + subst("CUDNN_SKIP_INPUT", "miopenRNNskip", "numeric_literal"); + subst("CUDNN_SOFTMAX_ACCURATE", "MIOPEN_SOFTMAX_ACCURATE", "numeric_literal"); + subst("CUDNN_SOFTMAX_FAST", "MIOPEN_SOFTMAX_FAST", "numeric_literal"); + subst("CUDNN_SOFTMAX_LOG", "MIOPEN_SOFTMAX_LOG", "numeric_literal"); + subst("CUDNN_SOFTMAX_MODE_CHANNEL", "MIOPEN_SOFTMAX_MODE_CHANNEL", "numeric_literal"); + subst("CUDNN_SOFTMAX_MODE_INSTANCE", "MIOPEN_SOFTMAX_MODE_INSTANCE", "numeric_literal"); + subst("CUDNN_STATUS_ALLOC_FAILED", "miopenStatusAllocFailed", "numeric_literal"); + subst("CUDNN_STATUS_BAD_PARAM", "miopenStatusBadParm", "numeric_literal"); + subst("CUDNN_STATUS_INTERNAL_ERROR", "miopenStatusInternalError", "numeric_literal"); + subst("CUDNN_STATUS_INVALID_VALUE", "miopenStatusInvalidValue", "numeric_literal"); + subst("CUDNN_STATUS_NOT_INITIALIZED", "miopenStatusNotInitialized", "numeric_literal"); + subst("CUDNN_STATUS_NOT_SUPPORTED", "miopenStatusUnsupportedOp", "numeric_literal"); + subst("CUDNN_STATUS_SUCCESS", "miopenStatusSuccess", "numeric_literal"); + subst("CUDNN_UNIDIRECTIONAL", "miopenRNNunidirection", "numeric_literal"); + subst("CUSOLVER_EIG_MODE_NOVECTOR", "rocblas_evect_none", "numeric_literal"); + subst("CUSOLVER_EIG_MODE_VECTOR", "rocblas_evect_original", "numeric_literal"); + subst("CUSOLVER_EIG_RANGE_ALL", "rocblas_erange_all", "numeric_literal"); + subst("CUSOLVER_EIG_RANGE_I", "rocblas_erange_index", "numeric_literal"); + subst("CUSOLVER_EIG_RANGE_V", "rocblas_erange_value", "numeric_literal"); + subst("CUSOLVER_EIG_TYPE_1", "rocblas_eform_ax", "numeric_literal"); + subst("CUSOLVER_EIG_TYPE_2", "rocblas_eform_abx", "numeric_literal"); + subst("CUSOLVER_EIG_TYPE_3", "rocblas_eform_bax", "numeric_literal"); + subst("CUSOLVER_STATUS_ALLOC_FAILED", "rocblas_status_memory_error", "numeric_literal"); + subst("CUSOLVER_STATUS_ARCH_MISMATCH", "rocblas_status_arch_mismatch", "numeric_literal"); + subst("CUSOLVER_STATUS_EXECUTION_FAILED", "rocblas_status_not_implemented", "numeric_literal"); + subst("CUSOLVER_STATUS_INTERNAL_ERROR", "rocblas_status_internal_error", "numeric_literal"); + subst("CUSOLVER_STATUS_INVALID_VALUE", "rocblas_status_invalid_value", "numeric_literal"); + subst("CUSOLVER_STATUS_MAPPING_ERROR", "rocblas_status_not_implemented", "numeric_literal"); + subst("CUSOLVER_STATUS_NOT_INITIALIZED", "rocblas_status_invalid_handle", "numeric_literal"); + subst("CUSOLVER_STATUS_NOT_SUPPORTED", "rocblas_status_not_implemented", "numeric_literal"); + subst("CUSOLVER_STATUS_SUCCESS", "rocblas_status_success", "numeric_literal"); + subst("CUSOLVER_STATUS_ZERO_PIVOT", "rocblas_status_not_implemented", "numeric_literal"); + subst("CUSPARSE_ACTION_NUMERIC", "rocsparse_action_numeric", "numeric_literal"); + subst("CUSPARSE_ACTION_SYMBOLIC", "rocsparse_action_symbolic", "numeric_literal"); + subst("CUSPARSE_DENSETOSPARSE_ALG_DEFAULT", "rocsparse_dense_to_sparse_alg_default", "numeric_literal"); + subst("CUSPARSE_DIAG_TYPE_NON_UNIT", "rocsparse_diag_type_non_unit", "numeric_literal"); + subst("CUSPARSE_DIAG_TYPE_UNIT", "rocsparse_diag_type_unit", "numeric_literal"); + subst("CUSPARSE_DIRECTION_COLUMN", "rocsparse_direction_column", "numeric_literal"); + subst("CUSPARSE_DIRECTION_ROW", "rocsparse_direction_row", "numeric_literal"); + subst("CUSPARSE_FILL_MODE_LOWER", "rocsparse_fill_mode_lower", "numeric_literal"); + subst("CUSPARSE_FILL_MODE_UPPER", "rocsparse_fill_mode_upper", "numeric_literal"); + subst("CUSPARSE_FORMAT_BLOCKED_ELL", "rocsparse_format_bell", "numeric_literal"); + subst("CUSPARSE_FORMAT_BSR", "rocsparse_format_bsr", "numeric_literal"); + subst("CUSPARSE_FORMAT_COO", "rocsparse_format_coo", "numeric_literal"); + subst("CUSPARSE_FORMAT_COO_AOS", "rocsparse_format_coo_aos", "numeric_literal"); + subst("CUSPARSE_FORMAT_CSC", "rocsparse_format_csc", "numeric_literal"); + subst("CUSPARSE_FORMAT_CSR", "rocsparse_format_csr", "numeric_literal"); + subst("CUSPARSE_FORMAT_SLICED_ELLPACK", "rocsparse_format_ell", "numeric_literal"); + subst("CUSPARSE_HYB_PARTITION_AUTO", "rocsparse_hyb_partition_auto", "numeric_literal"); + subst("CUSPARSE_HYB_PARTITION_MAX", "rocsparse_hyb_partition_max", "numeric_literal"); + subst("CUSPARSE_HYB_PARTITION_USER", "rocsparse_hyb_partition_user", "numeric_literal"); + subst("CUSPARSE_INDEX_16U", "rocsparse_indextype_u16", "numeric_literal"); + subst("CUSPARSE_INDEX_32I", "rocsparse_indextype_i32", "numeric_literal"); + subst("CUSPARSE_INDEX_64I", "rocsparse_indextype_i64", "numeric_literal"); + subst("CUSPARSE_INDEX_BASE_ONE", "rocsparse_index_base_one", "numeric_literal"); + subst("CUSPARSE_INDEX_BASE_ZERO", "rocsparse_index_base_zero", "numeric_literal"); + subst("CUSPARSE_MATRIX_TYPE_GENERAL", "rocsparse_matrix_type_general", "numeric_literal"); + subst("CUSPARSE_MATRIX_TYPE_HERMITIAN", "rocsparse_matrix_type_hermitian", "numeric_literal"); + subst("CUSPARSE_MATRIX_TYPE_SYMMETRIC", "rocsparse_matrix_type_symmetric", "numeric_literal"); + subst("CUSPARSE_MATRIX_TYPE_TRIANGULAR", "rocsparse_matrix_type_triangular", "numeric_literal"); + subst("CUSPARSE_OPERATION_CONJUGATE_TRANSPOSE", "rocsparse_operation_conjugate_transpose", "numeric_literal"); + subst("CUSPARSE_OPERATION_NON_TRANSPOSE", "rocsparse_operation_none", "numeric_literal"); + subst("CUSPARSE_OPERATION_TRANSPOSE", "rocsparse_operation_transpose", "numeric_literal"); + subst("CUSPARSE_ORDER_COL", "rocsparse_order_row", "numeric_literal"); + subst("CUSPARSE_ORDER_ROW", "rocsparse_order_column", "numeric_literal"); + subst("CUSPARSE_POINTER_MODE_DEVICE", "rocsparse_pointer_mode_device", "numeric_literal"); + subst("CUSPARSE_POINTER_MODE_HOST", "rocsparse_pointer_mode_host", "numeric_literal"); + subst("CUSPARSE_SDDMM_ALG_DEFAULT", "rocsparse_sddmm_alg_default", "numeric_literal"); + subst("CUSPARSE_SOLVE_POLICY_NO_LEVEL", "rocsparse_solve_policy_auto", "numeric_literal"); + subst("CUSPARSE_SOLVE_POLICY_USE_LEVEL", "rocsparse_solve_policy_auto", "numeric_literal"); + subst("CUSPARSE_SPARSETODENSE_ALG_DEFAULT", "rocsparse_sparse_to_dense_alg_default", "numeric_literal"); + subst("CUSPARSE_SPGEMM_DEFAULT", "rocsparse_spgemm_alg_default", "numeric_literal"); + subst("CUSPARSE_SPMAT_DIAG_TYPE", "rocsparse_spmat_diag_type", "numeric_literal"); + subst("CUSPARSE_SPMAT_FILL_MODE", "rocsparse_spmat_fill_mode", "numeric_literal"); + subst("CUSPARSE_SPMM_ALG_DEFAULT", "rocsparse_spmm_alg_default", "numeric_literal"); + subst("CUSPARSE_SPMM_BLOCKED_ELL_ALG1", "rocsparse_spmm_alg_bell", "numeric_literal"); + subst("CUSPARSE_SPMM_COO_ALG1", "rocsparse_spmm_alg_coo_segmented", "numeric_literal"); + subst("CUSPARSE_SPMM_COO_ALG2", "rocsparse_spmm_alg_coo_atomic", "numeric_literal"); + subst("CUSPARSE_SPMM_COO_ALG3", "rocsparse_spmm_alg_coo_segmented_atomic", "numeric_literal"); + subst("CUSPARSE_SPMM_CSR_ALG1", "rocsparse_spmm_alg_csr", "numeric_literal"); + subst("CUSPARSE_SPMM_CSR_ALG2", "rocsparse_spmm_alg_csr_row_split", "numeric_literal"); + subst("CUSPARSE_SPMM_CSR_ALG3", "rocsparse_spmm_alg_csr_merge", "numeric_literal"); + subst("CUSPARSE_SPMV_ALG_DEFAULT", "rocsparse_spmv_alg_default", "numeric_literal"); + subst("CUSPARSE_SPMV_COO_ALG1", "rocsparse_spmv_alg_coo", "numeric_literal"); + subst("CUSPARSE_SPMV_COO_ALG2", "rocsparse_spmv_alg_coo_atomic", "numeric_literal"); + subst("CUSPARSE_SPMV_CSR_ALG1", "rocsparse_spmv_alg_csr_adaptive", "numeric_literal"); + subst("CUSPARSE_SPMV_CSR_ALG2", "rocsparse_spmv_alg_csr_stream", "numeric_literal"); + subst("CUSPARSE_SPMV_SELL_ALG1", "rocsparse_spmv_alg_ell", "numeric_literal"); + subst("CUSPARSE_SPSM_ALG_DEFAULT", "rocsparse_spsm_alg_default", "numeric_literal"); + subst("CUSPARSE_SPSV_ALG_DEFAULT", "rocsparse_spsv_alg_default", "numeric_literal"); + subst("CUSPARSE_STATUS_ALLOC_FAILED", "rocsparse_status_memory_error", "numeric_literal"); + subst("CUSPARSE_STATUS_ARCH_MISMATCH", "rocsparse_status_arch_mismatch", "numeric_literal"); + subst("CUSPARSE_STATUS_INTERNAL_ERROR", "rocsparse_status_internal_error", "numeric_literal"); + subst("CUSPARSE_STATUS_INVALID_VALUE", "rocsparse_status_invalid_value", "numeric_literal"); + subst("CUSPARSE_STATUS_NOT_INITIALIZED", "rocsparse_status_not_initialized", "numeric_literal"); + subst("CUSPARSE_STATUS_NOT_SUPPORTED", "rocsparse_status_not_implemented", "numeric_literal"); + subst("CUSPARSE_STATUS_SUCCESS", "rocsparse_status_success", "numeric_literal"); + subst("CUSPARSE_STATUS_ZERO_PIVOT", "rocsparse_status_zero_pivot", "numeric_literal"); + subst("cusolver_int_t", "rocblas_int", "numeric_literal"); +} + +sub simpleSubstitutions { + subst("cuGetErrorName", "hipDrvGetErrorName", "error"); + subst("cuGetErrorString", "hipDrvGetErrorString", "error"); + subst("cudaGetErrorName", "hipGetErrorName", "error"); + subst("cudaGetErrorString", "hipGetErrorString", "error"); + subst("cudaGetLastError", "hipGetLastError", "error"); + subst("cudaPeekAtLastError", "hipPeekAtLastError", "error"); + subst("cuInit", "hipInit", "init"); + subst("cuDriverGetVersion", "hipDriverGetVersion", "version"); + subst("cudaDriverGetVersion", "hipDriverGetVersion", "version"); + subst("cudaRuntimeGetVersion", "hipRuntimeGetVersion", "version"); + subst("cuDeviceComputeCapability", "hipDeviceComputeCapability", "device"); + subst("cuDeviceGet", "hipDeviceGet", "device"); + subst("cuDeviceGetAttribute", "hipDeviceGetAttribute", "device"); + subst("cuDeviceGetCount", "hipGetDeviceCount", "device"); + subst("cuDeviceGetDefaultMemPool", "hipDeviceGetDefaultMemPool", "device"); + subst("cuDeviceGetMemPool", "hipDeviceGetMemPool", "device"); + subst("cuDeviceGetName", "hipDeviceGetName", "device"); + subst("cuDeviceGetUuid", "hipDeviceGetUuid", "device"); + subst("cuDeviceGetUuid_v2", "hipDeviceGetUuid", "device"); + subst("cuDeviceSetMemPool", "hipDeviceSetMemPool", "device"); + subst("cuDeviceTotalMem", "hipDeviceTotalMem", "device"); + subst("cuDeviceTotalMem_v2", "hipDeviceTotalMem", "device"); + subst("cudaChooseDevice", "hipChooseDevice", "device"); + subst("cudaDeviceGetAttribute", "hipDeviceGetAttribute", "device"); + subst("cudaDeviceGetByPCIBusId", "hipDeviceGetByPCIBusId", "device"); + subst("cudaDeviceGetCacheConfig", "hipDeviceGetCacheConfig", "device"); + subst("cudaDeviceGetDefaultMemPool", "hipDeviceGetDefaultMemPool", "device"); + subst("cudaDeviceGetLimit", "hipDeviceGetLimit", "device"); + subst("cudaDeviceGetMemPool", "hipDeviceGetMemPool", "device"); + subst("cudaDeviceGetP2PAttribute", "hipDeviceGetP2PAttribute", "device"); + subst("cudaDeviceGetPCIBusId", "hipDeviceGetPCIBusId", "device"); + subst("cudaDeviceGetSharedMemConfig", "hipDeviceGetSharedMemConfig", "device"); + subst("cudaDeviceGetStreamPriorityRange", "hipDeviceGetStreamPriorityRange", "device"); + subst("cudaDeviceReset", "hipDeviceReset", "device"); + subst("cudaDeviceSetCacheConfig", "hipDeviceSetCacheConfig", "device"); + subst("cudaDeviceSetLimit", "hipDeviceSetLimit", "device"); + subst("cudaDeviceSetMemPool", "hipDeviceSetMemPool", "device"); + subst("cudaDeviceSetSharedMemConfig", "hipDeviceSetSharedMemConfig", "device"); + subst("cudaDeviceSynchronize", "hipDeviceSynchronize", "device"); + subst("cudaGetDevice", "hipGetDevice", "device"); + subst("cudaGetDeviceCount", "hipGetDeviceCount", "device"); + subst("cudaGetDeviceFlags", "hipGetDeviceFlags", "device"); + subst("cudaGetDeviceProperties", "hipGetDeviceProperties", "device"); + subst("cudaIpcCloseMemHandle", "hipIpcCloseMemHandle", "device"); + subst("cudaIpcGetEventHandle", "hipIpcGetEventHandle", "device"); + subst("cudaIpcGetMemHandle", "hipIpcGetMemHandle", "device"); + subst("cudaIpcOpenEventHandle", "hipIpcOpenEventHandle", "device"); + subst("cudaIpcOpenMemHandle", "hipIpcOpenMemHandle", "device"); + subst("cudaSetDevice", "hipSetDevice", "device"); + subst("cudaSetDeviceFlags", "hipSetDeviceFlags", "device"); + subst("cuCtxCreate", "hipCtxCreate", "context"); + subst("cuCtxCreate_v2", "hipCtxCreate", "context"); + subst("cuCtxDestroy", "hipCtxDestroy", "context"); + subst("cuCtxDestroy_v2", "hipCtxDestroy", "context"); + subst("cuCtxGetApiVersion", "hipCtxGetApiVersion", "context"); + subst("cuCtxGetCacheConfig", "hipCtxGetCacheConfig", "context"); + subst("cuCtxGetCurrent", "hipCtxGetCurrent", "context"); + subst("cuCtxGetDevice", "hipCtxGetDevice", "context"); + subst("cuCtxGetFlags", "hipCtxGetFlags", "context"); + subst("cuCtxGetLimit", "hipDeviceGetLimit", "context"); + subst("cuCtxGetSharedMemConfig", "hipCtxGetSharedMemConfig", "context"); + subst("cuCtxGetStreamPriorityRange", "hipDeviceGetStreamPriorityRange", "context"); + subst("cuCtxPopCurrent", "hipCtxPopCurrent", "context"); + subst("cuCtxPopCurrent_v2", "hipCtxPopCurrent", "context"); + subst("cuCtxPushCurrent", "hipCtxPushCurrent", "context"); + subst("cuCtxPushCurrent_v2", "hipCtxPushCurrent", "context"); + subst("cuCtxSetCacheConfig", "hipCtxSetCacheConfig", "context"); + subst("cuCtxSetCurrent", "hipCtxSetCurrent", "context"); + subst("cuCtxSetLimit", "hipDeviceSetLimit", "context"); + subst("cuCtxSetSharedMemConfig", "hipCtxSetSharedMemConfig", "context"); + subst("cuCtxSynchronize", "hipCtxSynchronize", "context"); + subst("cuDevicePrimaryCtxGetState", "hipDevicePrimaryCtxGetState", "context"); + subst("cuDevicePrimaryCtxRelease", "hipDevicePrimaryCtxRelease", "context"); + subst("cuDevicePrimaryCtxRelease_v2", "hipDevicePrimaryCtxRelease", "context"); + subst("cuDevicePrimaryCtxReset", "hipDevicePrimaryCtxReset", "context"); + subst("cuDevicePrimaryCtxReset_v2", "hipDevicePrimaryCtxReset", "context"); + subst("cuDevicePrimaryCtxRetain", "hipDevicePrimaryCtxRetain", "context"); + subst("cuDevicePrimaryCtxSetFlags", "hipDevicePrimaryCtxSetFlags", "context"); + subst("cuDevicePrimaryCtxSetFlags_v2", "hipDevicePrimaryCtxSetFlags", "context"); + subst("cuLinkAddData", "hiprtcLinkAddData", "module"); + subst("cuLinkAddData_v2", "hiprtcLinkAddData", "module"); + subst("cuLinkAddFile", "hiprtcLinkAddFile", "module"); + subst("cuLinkAddFile_v2", "hiprtcLinkAddFile", "module"); + subst("cuLinkComplete", "hiprtcLinkComplete", "module"); + subst("cuLinkCreate", "hiprtcLinkCreate", "module"); + subst("cuLinkCreate_v2", "hiprtcLinkCreate", "module"); + subst("cuLinkDestroy", "hiprtcLinkDestroy", "module"); + subst("cuModuleGetFunction", "hipModuleGetFunction", "module"); + subst("cuModuleGetGlobal", "hipModuleGetGlobal", "module"); + subst("cuModuleGetGlobal_v2", "hipModuleGetGlobal", "module"); + subst("cuModuleGetTexRef", "hipModuleGetTexRef", "module"); + subst("cuModuleLoad", "hipModuleLoad", "module"); + subst("cuModuleLoadData", "hipModuleLoadData", "module"); + subst("cuModuleLoadDataEx", "hipModuleLoadDataEx", "module"); + subst("cuModuleUnload", "hipModuleUnload", "module"); + subst("cuArray3DCreate", "hipArray3DCreate", "memory"); + subst("cuArray3DCreate_v2", "hipArray3DCreate", "memory"); + subst("cuArray3DGetDescriptor", "hipArray3DGetDescriptor", "memory"); + subst("cuArray3DGetDescriptor_v2", "hipArray3DGetDescriptor", "memory"); + subst("cuArrayCreate", "hipArrayCreate", "memory"); + subst("cuArrayCreate_v2", "hipArrayCreate", "memory"); + subst("cuArrayDestroy", "hipArrayDestroy", "memory"); + subst("cuArrayGetDescriptor", "hipArrayGetDescriptor", "memory"); + subst("cuArrayGetDescriptor_v2", "hipArrayGetDescriptor", "memory"); + subst("cuDeviceGetByPCIBusId", "hipDeviceGetByPCIBusId", "memory"); + subst("cuDeviceGetPCIBusId", "hipDeviceGetPCIBusId", "memory"); + subst("cuIpcCloseMemHandle", "hipIpcCloseMemHandle", "memory"); + subst("cuIpcGetEventHandle", "hipIpcGetEventHandle", "memory"); + subst("cuIpcGetMemHandle", "hipIpcGetMemHandle", "memory"); + subst("cuIpcOpenEventHandle", "hipIpcOpenEventHandle", "memory"); + subst("cuIpcOpenMemHandle", "hipIpcOpenMemHandle", "memory"); + subst("cuMemAlloc", "hipMalloc", "memory"); + subst("cuMemAllocHost", "hipMemAllocHost", "memory"); + subst("cuMemAllocHost_v2", "hipMemAllocHost", "memory"); + subst("cuMemAllocManaged", "hipMallocManaged", "memory"); + subst("cuMemAllocPitch", "hipMemAllocPitch", "memory"); + subst("cuMemAllocPitch_v2", "hipMemAllocPitch", "memory"); + subst("cuMemAlloc_v2", "hipMalloc", "memory"); + subst("cuMemFree", "hipFree", "memory"); + subst("cuMemFreeHost", "hipHostFree", "memory"); + subst("cuMemFree_v2", "hipFree", "memory"); + subst("cuMemGetAddressRange", "hipMemGetAddressRange", "memory"); + subst("cuMemGetAddressRange_v2", "hipMemGetAddressRange", "memory"); + subst("cuMemGetInfo", "hipMemGetInfo", "memory"); + subst("cuMemGetInfo_v2", "hipMemGetInfo", "memory"); + subst("cuMemHostAlloc", "hipHostAlloc", "memory"); + subst("cuMemHostGetDevicePointer", "hipHostGetDevicePointer", "memory"); + subst("cuMemHostGetDevicePointer_v2", "hipHostGetDevicePointer", "memory"); + subst("cuMemHostGetFlags", "hipHostGetFlags", "memory"); + subst("cuMemHostRegister", "hipHostRegister", "memory"); + subst("cuMemHostRegister_v2", "hipHostRegister", "memory"); + subst("cuMemHostUnregister", "hipHostUnregister", "memory"); + subst("cuMemcpy2D", "hipMemcpyParam2D", "memory"); + subst("cuMemcpy2DAsync", "hipMemcpyParam2DAsync", "memory"); + subst("cuMemcpy2DAsync_v2", "hipMemcpyParam2DAsync", "memory"); + subst("cuMemcpy2DUnaligned", "hipDrvMemcpy2DUnaligned", "memory"); + subst("cuMemcpy2DUnaligned_v2", "hipDrvMemcpy2DUnaligned", "memory"); + subst("cuMemcpy2D_v2", "hipMemcpyParam2D", "memory"); + subst("cuMemcpy3D", "hipDrvMemcpy3D", "memory"); + subst("cuMemcpy3DAsync", "hipDrvMemcpy3DAsync", "memory"); + subst("cuMemcpy3DAsync_v2", "hipDrvMemcpy3DAsync", "memory"); + subst("cuMemcpy3D_v2", "hipDrvMemcpy3D", "memory"); + subst("cuMemcpyAtoH", "hipMemcpyAtoH", "memory"); + subst("cuMemcpyAtoH_v2", "hipMemcpyAtoH", "memory"); + subst("cuMemcpyDtoD", "hipMemcpyDtoD", "memory"); + subst("cuMemcpyDtoDAsync", "hipMemcpyDtoDAsync", "memory"); + subst("cuMemcpyDtoDAsync_v2", "hipMemcpyDtoDAsync", "memory"); + subst("cuMemcpyDtoD_v2", "hipMemcpyDtoD", "memory"); + subst("cuMemcpyDtoH", "hipMemcpyDtoH", "memory"); + subst("cuMemcpyDtoHAsync", "hipMemcpyDtoHAsync", "memory"); + subst("cuMemcpyDtoHAsync_v2", "hipMemcpyDtoHAsync", "memory"); + subst("cuMemcpyDtoH_v2", "hipMemcpyDtoH", "memory"); + subst("cuMemcpyHtoA", "hipMemcpyHtoA", "memory"); + subst("cuMemcpyHtoA_v2", "hipMemcpyHtoA", "memory"); + subst("cuMemcpyHtoD", "hipMemcpyHtoD", "memory"); + subst("cuMemcpyHtoDAsync", "hipMemcpyHtoDAsync", "memory"); + subst("cuMemcpyHtoDAsync_v2", "hipMemcpyHtoDAsync", "memory"); + subst("cuMemcpyHtoD_v2", "hipMemcpyHtoD", "memory"); + subst("cuMemsetD16", "hipMemsetD16", "memory"); + subst("cuMemsetD16Async", "hipMemsetD16Async", "memory"); + subst("cuMemsetD16_v2", "hipMemsetD16", "memory"); + subst("cuMemsetD32", "hipMemsetD32", "memory"); + subst("cuMemsetD32Async", "hipMemsetD32Async", "memory"); + subst("cuMemsetD32_v2", "hipMemsetD32", "memory"); + subst("cuMemsetD8", "hipMemsetD8", "memory"); + subst("cuMemsetD8Async", "hipMemsetD8Async", "memory"); + subst("cuMemsetD8_v2", "hipMemsetD8", "memory"); + subst("cuMipmappedArrayCreate", "hipMipmappedArrayCreate", "memory"); + subst("cuMipmappedArrayDestroy", "hipMipmappedArrayDestroy", "memory"); + subst("cuMipmappedArrayGetLevel", "hipMipmappedArrayGetLevel", "memory"); + subst("cudaArrayGetInfo", "hipArrayGetInfo", "memory"); + subst("cudaFree", "hipFree", "memory"); + subst("cudaFreeArray", "hipFreeArray", "memory"); + subst("cudaFreeAsync", "hipFreeAsync", "memory"); + subst("cudaFreeHost", "hipHostFree", "memory"); + subst("cudaFreeMipmappedArray", "hipFreeMipmappedArray", "memory"); + subst("cudaGetMipmappedArrayLevel", "hipGetMipmappedArrayLevel", "memory"); + subst("cudaGetSymbolAddress", "hipGetSymbolAddress", "memory"); + subst("cudaGetSymbolSize", "hipGetSymbolSize", "memory"); + subst("cudaHostAlloc", "hipHostAlloc", "memory"); + subst("cudaHostGetDevicePointer", "hipHostGetDevicePointer", "memory"); + subst("cudaHostGetFlags", "hipHostGetFlags", "memory"); + subst("cudaHostRegister", "hipHostRegister", "memory"); + subst("cudaHostUnregister", "hipHostUnregister", "memory"); + subst("cudaMalloc", "hipMalloc", "memory"); + subst("cudaMalloc3D", "hipMalloc3D", "memory"); + subst("cudaMalloc3DArray", "hipMalloc3DArray", "memory"); + subst("cudaMallocArray", "hipMallocArray", "memory"); + subst("cudaMallocAsync", "hipMallocAsync", "memory"); + subst("cudaMallocFromPoolAsync", "hipMallocFromPoolAsync", "memory"); + subst("cudaMallocHost", "hipHostMalloc", "memory"); + subst("cudaMallocManaged", "hipMallocManaged", "memory"); + subst("cudaMallocMipmappedArray", "hipMallocMipmappedArray", "memory"); + subst("cudaMallocPitch", "hipMallocPitch", "memory"); + subst("cudaMemAdvise", "hipMemAdvise", "memory"); + subst("cudaMemGetInfo", "hipMemGetInfo", "memory"); + subst("cudaMemPoolCreate", "hipMemPoolCreate", "memory"); + subst("cudaMemPoolDestroy", "hipMemPoolDestroy", "memory"); + subst("cudaMemPoolExportPointer", "hipMemPoolExportPointer", "memory"); + subst("cudaMemPoolExportToShareableHandle", "hipMemPoolExportToShareableHandle", "memory"); + subst("cudaMemPoolGetAccess", "hipMemPoolGetAccess", "memory"); + subst("cudaMemPoolGetAttribute", "hipMemPoolGetAttribute", "memory"); + subst("cudaMemPoolImportFromShareableHandle", "hipMemPoolImportFromShareableHandle", "memory"); + subst("cudaMemPoolImportPointer", "hipMemPoolImportPointer", "memory"); + subst("cudaMemPoolSetAccess", "hipMemPoolSetAccess", "memory"); + subst("cudaMemPoolSetAttribute", "hipMemPoolSetAttribute", "memory"); + subst("cudaMemPoolTrimTo", "hipMemPoolTrimTo", "memory"); + subst("cudaMemPrefetchAsync", "hipMemPrefetchAsync", "memory"); + subst("cudaMemRangeGetAttribute", "hipMemRangeGetAttribute", "memory"); + subst("cudaMemRangeGetAttributes", "hipMemRangeGetAttributes", "memory"); + subst("cudaMemcpy", "hipMemcpy", "memory"); + subst("cudaMemcpy2D", "hipMemcpy2D", "memory"); + subst("cudaMemcpy2DAsync", "hipMemcpy2DAsync", "memory"); + subst("cudaMemcpy2DFromArray", "hipMemcpy2DFromArray", "memory"); + subst("cudaMemcpy2DFromArrayAsync", "hipMemcpy2DFromArrayAsync", "memory"); + subst("cudaMemcpy2DToArray", "hipMemcpy2DToArray", "memory"); + subst("cudaMemcpy2DToArrayAsync", "hipMemcpy2DToArrayAsync", "memory"); + subst("cudaMemcpy3D", "hipMemcpy3D", "memory"); + subst("cudaMemcpy3DAsync", "hipMemcpy3DAsync", "memory"); + subst("cudaMemcpyAsync", "hipMemcpyAsync", "memory"); + subst("cudaMemcpyFromArray", "hipMemcpyFromArray", "memory"); + subst("cudaMemcpyFromSymbol", "hipMemcpyFromSymbol", "memory"); + subst("cudaMemcpyFromSymbolAsync", "hipMemcpyFromSymbolAsync", "memory"); + subst("cudaMemcpyPeer", "hipMemcpyPeer", "memory"); + subst("cudaMemcpyPeerAsync", "hipMemcpyPeerAsync", "memory"); + subst("cudaMemcpyToArray", "hipMemcpyToArray", "memory"); + subst("cudaMemcpyToSymbol", "hipMemcpyToSymbol", "memory"); + subst("cudaMemcpyToSymbolAsync", "hipMemcpyToSymbolAsync", "memory"); + subst("cudaMemset", "hipMemset", "memory"); + subst("cudaMemset2D", "hipMemset2D", "memory"); + subst("cudaMemset2DAsync", "hipMemset2DAsync", "memory"); + subst("cudaMemset3D", "hipMemset3D", "memory"); + subst("cudaMemset3DAsync", "hipMemset3DAsync", "memory"); + subst("cudaMemsetAsync", "hipMemsetAsync", "memory"); + subst("make_cudaExtent", "make_hipExtent", "memory"); + subst("make_cudaPitchedPtr", "make_hipPitchedPtr", "memory"); + subst("make_cudaPos", "make_hipPos", "memory"); + subst("cuMemAddressFree", "hipMemAddressFree", "virtual_memory"); + subst("cuMemAddressReserve", "hipMemAddressReserve", "virtual_memory"); + subst("cuMemCreate", "hipMemCreate", "virtual_memory"); + subst("cuMemExportToShareableHandle", "hipMemExportToShareableHandle", "virtual_memory"); + subst("cuMemGetAccess", "hipMemGetAccess", "virtual_memory"); + subst("cuMemGetAllocationGranularity", "hipMemGetAllocationGranularity", "virtual_memory"); + subst("cuMemGetAllocationPropertiesFromHandle", "hipMemGetAllocationPropertiesFromHandle", "virtual_memory"); + subst("cuMemImportFromShareableHandle", "hipMemImportFromShareableHandle", "virtual_memory"); + subst("cuMemMap", "hipMemMap", "virtual_memory"); + subst("cuMemMapArrayAsync", "hipMemMapArrayAsync", "virtual_memory"); + subst("cuMemRelease", "hipMemRelease", "virtual_memory"); + subst("cuMemRetainAllocationHandle", "hipMemRetainAllocationHandle", "virtual_memory"); + subst("cuMemSetAccess", "hipMemSetAccess", "virtual_memory"); + subst("cuMemUnmap", "hipMemUnmap", "virtual_memory"); + subst("cuMemAllocAsync", "hipMallocAsync", "ordered_memory"); + subst("cuMemAllocFromPoolAsync", "hipMallocFromPoolAsync", "ordered_memory"); + subst("cuMemFreeAsync", "hipFreeAsync", "ordered_memory"); + subst("cuMemPoolCreate", "hipMemPoolCreate", "ordered_memory"); + subst("cuMemPoolDestroy", "hipMemPoolDestroy", "ordered_memory"); + subst("cuMemPoolExportPointer", "hipMemPoolExportPointer", "ordered_memory"); + subst("cuMemPoolExportToShareableHandle", "hipMemPoolExportToShareableHandle", "ordered_memory"); + subst("cuMemPoolGetAccess", "hipMemPoolGetAccess", "ordered_memory"); + subst("cuMemPoolGetAttribute", "hipMemPoolGetAttribute", "ordered_memory"); + subst("cuMemPoolImportFromShareableHandle", "hipMemPoolImportFromShareableHandle", "ordered_memory"); + subst("cuMemPoolImportPointer", "hipMemPoolImportPointer", "ordered_memory"); + subst("cuMemPoolSetAccess", "hipMemPoolSetAccess", "ordered_memory"); + subst("cuMemPoolSetAttribute", "hipMemPoolSetAttribute", "ordered_memory"); + subst("cuMemPoolTrimTo", "hipMemPoolTrimTo", "ordered_memory"); + subst("cuMemAdvise", "hipMemAdvise", "unified"); + subst("cuMemPrefetchAsync", "hipMemPrefetchAsync", "unified"); + subst("cuMemRangeGetAttribute", "hipMemRangeGetAttribute", "unified"); + subst("cuMemRangeGetAttributes", "hipMemRangeGetAttributes", "unified"); + subst("cuPointerGetAttribute", "hipPointerGetAttribute", "unified"); + subst("cuPointerGetAttributes", "hipDrvPointerGetAttributes", "unified"); + subst("cuPointerSetAttribute", "hipPointerSetAttribute", "unified"); + subst("cudaPointerGetAttributes", "hipPointerGetAttributes", "unified"); + subst("cuStreamAddCallback", "hipStreamAddCallback", "stream"); + subst("cuStreamAttachMemAsync", "hipStreamAttachMemAsync", "stream"); + subst("cuStreamBeginCapture", "hipStreamBeginCapture", "stream"); + subst("cuStreamBeginCapture_v2", "hipStreamBeginCapture", "stream"); + subst("cuStreamCreate", "hipStreamCreateWithFlags", "stream"); + subst("cuStreamCreateWithPriority", "hipStreamCreateWithPriority", "stream"); + subst("cuStreamDestroy", "hipStreamDestroy", "stream"); + subst("cuStreamDestroy_v2", "hipStreamDestroy", "stream"); + subst("cuStreamEndCapture", "hipStreamEndCapture", "stream"); + subst("cuStreamGetCaptureInfo", "hipStreamGetCaptureInfo", "stream"); + subst("cuStreamGetCaptureInfo_v2", "hipStreamGetCaptureInfo_v2", "stream"); + subst("cuStreamGetFlags", "hipStreamGetFlags", "stream"); + subst("cuStreamGetPriority", "hipStreamGetPriority", "stream"); + subst("cuStreamIsCapturing", "hipStreamIsCapturing", "stream"); + subst("cuStreamQuery", "hipStreamQuery", "stream"); + subst("cuStreamSynchronize", "hipStreamSynchronize", "stream"); + subst("cuStreamUpdateCaptureDependencies", "hipStreamUpdateCaptureDependencies", "stream"); + subst("cuStreamWaitEvent", "hipStreamWaitEvent", "stream"); + subst("cuThreadExchangeStreamCaptureMode", "hipThreadExchangeStreamCaptureMode", "stream"); + subst("cudaStreamAddCallback", "hipStreamAddCallback", "stream"); + subst("cudaStreamAttachMemAsync", "hipStreamAttachMemAsync", "stream"); + subst("cudaStreamBeginCapture", "hipStreamBeginCapture", "stream"); + subst("cudaStreamCreate", "hipStreamCreate", "stream"); + subst("cudaStreamCreateWithFlags", "hipStreamCreateWithFlags", "stream"); + subst("cudaStreamCreateWithPriority", "hipStreamCreateWithPriority", "stream"); + subst("cudaStreamDestroy", "hipStreamDestroy", "stream"); + subst("cudaStreamEndCapture", "hipStreamEndCapture", "stream"); + subst("cudaStreamGetCaptureInfo", "hipStreamGetCaptureInfo", "stream"); + subst("cudaStreamGetFlags", "hipStreamGetFlags", "stream"); + subst("cudaStreamGetPriority", "hipStreamGetPriority", "stream"); + subst("cudaStreamIsCapturing", "hipStreamIsCapturing", "stream"); + subst("cudaStreamQuery", "hipStreamQuery", "stream"); + subst("cudaStreamSynchronize", "hipStreamSynchronize", "stream"); + subst("cudaStreamUpdateCaptureDependencies", "hipStreamUpdateCaptureDependencies", "stream"); + subst("cudaStreamWaitEvent", "hipStreamWaitEvent", "stream"); + subst("cudaThreadExchangeStreamCaptureMode", "hipThreadExchangeStreamCaptureMode", "stream"); + subst("cuEventCreate", "hipEventCreateWithFlags", "event"); + subst("cuEventDestroy", "hipEventDestroy", "event"); + subst("cuEventDestroy_v2", "hipEventDestroy", "event"); + subst("cuEventElapsedTime", "hipEventElapsedTime", "event"); + subst("cuEventQuery", "hipEventQuery", "event"); + subst("cuEventRecord", "hipEventRecord", "event"); + subst("cuEventSynchronize", "hipEventSynchronize", "event"); + subst("cudaEventCreate", "hipEventCreate", "event"); + subst("cudaEventCreateWithFlags", "hipEventCreateWithFlags", "event"); + subst("cudaEventDestroy", "hipEventDestroy", "event"); + subst("cudaEventElapsedTime", "hipEventElapsedTime", "event"); + subst("cudaEventQuery", "hipEventQuery", "event"); + subst("cudaEventRecord", "hipEventRecord", "event"); + subst("cudaEventSynchronize", "hipEventSynchronize", "event"); + subst("cuDestroyExternalMemory", "hipDestroyExternalMemory", "external_resource"); + subst("cuDestroyExternalSemaphore", "hipDestroyExternalSemaphore", "external_resource"); + subst("cuExternalMemoryGetMappedBuffer", "hipExternalMemoryGetMappedBuffer", "external_resource"); + subst("cuImportExternalMemory", "hipImportExternalMemory", "external_resource"); + subst("cuImportExternalSemaphore", "hipImportExternalSemaphore", "external_resource"); + subst("cuSignalExternalSemaphoresAsync", "hipSignalExternalSemaphoresAsync", "external_resource"); + subst("cuWaitExternalSemaphoresAsync", "hipWaitExternalSemaphoresAsync", "external_resource"); + subst("cudaDestroyExternalMemory", "hipDestroyExternalMemory", "external_resource"); + subst("cudaDestroyExternalSemaphore", "hipDestroyExternalSemaphore", "external_resource"); + subst("cudaExternalMemoryGetMappedBuffer", "hipExternalMemoryGetMappedBuffer", "external_resource"); + subst("cudaImportExternalMemory", "hipImportExternalMemory", "external_resource"); + subst("cudaImportExternalSemaphore", "hipImportExternalSemaphore", "external_resource"); + subst("cudaSignalExternalSemaphoresAsync", "hipSignalExternalSemaphoresAsync", "external_resource"); + subst("cudaWaitExternalSemaphoresAsync", "hipWaitExternalSemaphoresAsync", "external_resource"); + subst("cuStreamWaitValue32", "hipStreamWaitValue32", "stream_memory"); + subst("cuStreamWaitValue32_v2", "hipStreamWaitValue32", "stream_memory"); + subst("cuStreamWaitValue64", "hipStreamWaitValue64", "stream_memory"); + subst("cuStreamWaitValue64_v2", "hipStreamWaitValue64", "stream_memory"); + subst("cuStreamWriteValue32", "hipStreamWriteValue32", "stream_memory"); + subst("cuStreamWriteValue32_v2", "hipStreamWriteValue32", "stream_memory"); + subst("cuStreamWriteValue64", "hipStreamWriteValue64", "stream_memory"); + subst("cuStreamWriteValue64_v2", "hipStreamWriteValue64", "stream_memory"); + subst("cuFuncGetAttribute", "hipFuncGetAttribute", "execution"); + subst("cuLaunchCooperativeKernel", "hipModuleLaunchCooperativeKernel", "execution"); + subst("cuLaunchCooperativeKernelMultiDevice", "hipModuleLaunchCooperativeKernelMultiDevice", "execution"); + subst("cuLaunchHostFunc", "hipLaunchHostFunc", "execution"); + subst("cuLaunchKernel", "hipModuleLaunchKernel", "execution"); + subst("cudaConfigureCall", "hipConfigureCall", "execution"); + subst("cudaFuncGetAttributes", "hipFuncGetAttributes", "execution"); + subst("cudaFuncSetAttribute", "hipFuncSetAttribute", "execution"); + subst("cudaFuncSetCacheConfig", "hipFuncSetCacheConfig", "execution"); + subst("cudaFuncSetSharedMemConfig", "hipFuncSetSharedMemConfig", "execution"); + subst("cudaLaunch", "hipLaunchByPtr", "execution"); + subst("cudaLaunchCooperativeKernel", "hipLaunchCooperativeKernel", "execution"); + subst("cudaLaunchCooperativeKernelMultiDevice", "hipLaunchCooperativeKernelMultiDevice", "execution"); + subst("cudaLaunchHostFunc", "hipLaunchHostFunc", "execution"); + subst("cudaLaunchKernel", "hipLaunchKernel", "execution"); + subst("cudaSetupArgument", "hipSetupArgument", "execution"); + subst("cuDeviceGetGraphMemAttribute", "hipDeviceGetGraphMemAttribute", "graph"); + subst("cuDeviceGraphMemTrim", "hipDeviceGraphMemTrim", "graph"); + subst("cuDeviceSetGraphMemAttribute", "hipDeviceSetGraphMemAttribute", "graph"); + subst("cuGraphAddBatchMemOpNode", "hipGraphAddBatchMemOpNode", "graph"); + subst("cuGraphAddChildGraphNode", "hipGraphAddChildGraphNode", "graph"); + subst("cuGraphAddDependencies", "hipGraphAddDependencies", "graph"); + subst("cuGraphAddEmptyNode", "hipGraphAddEmptyNode", "graph"); + subst("cuGraphAddEventRecordNode", "hipGraphAddEventRecordNode", "graph"); + subst("cuGraphAddEventWaitNode", "hipGraphAddEventWaitNode", "graph"); + subst("cuGraphAddExternalSemaphoresSignalNode", "hipGraphAddExternalSemaphoresSignalNode", "graph"); + subst("cuGraphAddExternalSemaphoresWaitNode", "hipGraphAddExternalSemaphoresWaitNode", "graph"); + subst("cuGraphAddHostNode", "hipGraphAddHostNode", "graph"); + subst("cuGraphAddKernelNode", "hipGraphAddKernelNode", "graph"); + subst("cuGraphAddMemAllocNode", "hipGraphAddMemAllocNode", "graph"); + subst("cuGraphAddMemcpyNode", "hipDrvGraphAddMemcpyNode", "graph"); + subst("cuGraphAddMemsetNode", "hipDrvGraphAddMemsetNode", "graph"); + subst("cuGraphBatchMemOpNodeGetParams", "hipGraphBatchMemOpNodeGetParams", "graph"); + subst("cuGraphBatchMemOpNodeSetParams", "hipGraphBatchMemOpNodeSetParams", "graph"); + subst("cuGraphChildGraphNodeGetGraph", "hipGraphChildGraphNodeGetGraph", "graph"); + subst("cuGraphClone", "hipGraphClone", "graph"); + subst("cuGraphCreate", "hipGraphCreate", "graph"); + subst("cuGraphDebugDotPrint", "hipGraphDebugDotPrint", "graph"); + subst("cuGraphDestroy", "hipGraphDestroy", "graph"); + subst("cuGraphDestroyNode", "hipGraphDestroyNode", "graph"); + subst("cuGraphEventRecordNodeGetEvent", "hipGraphEventRecordNodeGetEvent", "graph"); + subst("cuGraphEventRecordNodeSetEvent", "hipGraphEventRecordNodeSetEvent", "graph"); + subst("cuGraphEventWaitNodeGetEvent", "hipGraphEventWaitNodeGetEvent", "graph"); + subst("cuGraphEventWaitNodeSetEvent", "hipGraphEventWaitNodeSetEvent", "graph"); + subst("cuGraphExecBatchMemOpNodeSetParams", "hipGraphExecBatchMemOpNodeSetParams", "graph"); + subst("cuGraphExecChildGraphNodeSetParams", "hipGraphExecChildGraphNodeSetParams", "graph"); + subst("cuGraphExecDestroy", "hipGraphExecDestroy", "graph"); + subst("cuGraphExecEventRecordNodeSetEvent", "hipGraphExecEventRecordNodeSetEvent", "graph"); + subst("cuGraphExecEventWaitNodeSetEvent", "hipGraphExecEventWaitNodeSetEvent", "graph"); + subst("cuGraphExecExternalSemaphoresSignalNodeSetParams", "hipGraphExecExternalSemaphoresSignalNodeSetParams", "graph"); + subst("cuGraphExecExternalSemaphoresWaitNodeSetParams", "hipGraphExecExternalSemaphoresWaitNodeSetParams", "graph"); + subst("cuGraphExecHostNodeSetParams", "hipGraphExecHostNodeSetParams", "graph"); + subst("cuGraphExecKernelNodeSetParams", "hipGraphExecKernelNodeSetParams", "graph"); + subst("cuGraphExecUpdate", "hipGraphExecUpdate", "graph"); + subst("cuGraphExternalSemaphoresSignalNodeGetParams", "hipGraphExternalSemaphoresSignalNodeGetParams", "graph"); + subst("cuGraphExternalSemaphoresSignalNodeSetParams", "hipGraphExternalSemaphoresSignalNodeSetParams", "graph"); + subst("cuGraphExternalSemaphoresWaitNodeGetParams", "hipGraphExternalSemaphoresWaitNodeGetParams", "graph"); + subst("cuGraphExternalSemaphoresWaitNodeSetParams", "hipGraphExternalSemaphoresWaitNodeSetParams", "graph"); + subst("cuGraphGetEdges", "hipGraphGetEdges", "graph"); + subst("cuGraphGetNodes", "hipGraphGetNodes", "graph"); + subst("cuGraphGetRootNodes", "hipGraphGetRootNodes", "graph"); + subst("cuGraphHostNodeGetParams", "hipGraphHostNodeGetParams", "graph"); + subst("cuGraphHostNodeSetParams", "hipGraphHostNodeSetParams", "graph"); + subst("cuGraphInstantiate", "hipGraphInstantiate", "graph"); + subst("cuGraphInstantiateWithFlags", "hipGraphInstantiateWithFlags", "graph"); + subst("cuGraphInstantiate_v2", "hipGraphInstantiate", "graph"); + subst("cuGraphKernelNodeCopyAttributes", "hipGraphKernelNodeCopyAttributes", "graph"); + subst("cuGraphKernelNodeGetAttribute", "hipGraphKernelNodeGetAttribute", "graph"); + subst("cuGraphKernelNodeGetParams", "hipGraphKernelNodeGetParams", "graph"); + subst("cuGraphKernelNodeSetAttribute", "hipGraphKernelNodeSetAttribute", "graph"); + subst("cuGraphKernelNodeSetParams", "hipGraphKernelNodeSetParams", "graph"); + subst("cuGraphLaunch", "hipGraphLaunch", "graph"); + subst("cuGraphMemAllocNodeGetParams", "hipGraphMemAllocNodeGetParams", "graph"); + subst("cuGraphMemFreeNodeGetParams", "hipGraphMemFreeNodeGetParams", "graph"); + subst("cuGraphMemsetNodeGetParams", "hipGraphMemsetNodeGetParams", "graph"); + subst("cuGraphMemsetNodeSetParams", "hipGraphMemsetNodeSetParams", "graph"); + subst("cuGraphNodeFindInClone", "hipGraphNodeFindInClone", "graph"); + subst("cuGraphNodeGetDependencies", "hipGraphNodeGetDependencies", "graph"); + subst("cuGraphNodeGetDependentNodes", "hipGraphNodeGetDependentNodes", "graph"); + subst("cuGraphNodeGetEnabled", "hipGraphNodeGetEnabled", "graph"); + subst("cuGraphNodeGetType", "hipGraphNodeGetType", "graph"); + subst("cuGraphNodeSetEnabled", "hipGraphNodeSetEnabled", "graph"); + subst("cuGraphReleaseUserObject", "hipGraphReleaseUserObject", "graph"); + subst("cuGraphRemoveDependencies", "hipGraphRemoveDependencies", "graph"); + subst("cuGraphRetainUserObject", "hipGraphRetainUserObject", "graph"); + subst("cuGraphUpload", "hipGraphUpload", "graph"); + subst("cuUserObjectCreate", "hipUserObjectCreate", "graph"); + subst("cuUserObjectRelease", "hipUserObjectRelease", "graph"); + subst("cuUserObjectRetain", "hipUserObjectRetain", "graph"); + subst("cudaDeviceGetGraphMemAttribute", "hipDeviceGetGraphMemAttribute", "graph"); + subst("cudaDeviceGraphMemTrim", "hipDeviceGraphMemTrim", "graph"); + subst("cudaDeviceSetGraphMemAttribute", "hipDeviceSetGraphMemAttribute", "graph"); + subst("cudaGraphAddChildGraphNode", "hipGraphAddChildGraphNode", "graph"); + subst("cudaGraphAddDependencies", "hipGraphAddDependencies", "graph"); + subst("cudaGraphAddEmptyNode", "hipGraphAddEmptyNode", "graph"); + subst("cudaGraphAddEventRecordNode", "hipGraphAddEventRecordNode", "graph"); + subst("cudaGraphAddEventWaitNode", "hipGraphAddEventWaitNode", "graph"); + subst("cudaGraphAddExternalSemaphoresSignalNode", "hipGraphAddExternalSemaphoresSignalNode", "graph"); + subst("cudaGraphAddExternalSemaphoresWaitNode", "hipGraphAddExternalSemaphoresWaitNode", "graph"); + subst("cudaGraphAddHostNode", "hipGraphAddHostNode", "graph"); + subst("cudaGraphAddKernelNode", "hipGraphAddKernelNode", "graph"); + subst("cudaGraphAddMemAllocNode", "hipGraphAddMemAllocNode", "graph"); + subst("cudaGraphAddMemFreeNode", "hipGraphAddMemFreeNode", "graph"); + subst("cudaGraphAddMemcpyNode", "hipGraphAddMemcpyNode", "graph"); + subst("cudaGraphAddMemcpyNode1D", "hipGraphAddMemcpyNode1D", "graph"); + subst("cudaGraphAddMemcpyNodeFromSymbol", "hipGraphAddMemcpyNodeFromSymbol", "graph"); + subst("cudaGraphAddMemcpyNodeToSymbol", "hipGraphAddMemcpyNodeToSymbol", "graph"); + subst("cudaGraphAddMemsetNode", "hipGraphAddMemsetNode", "graph"); + subst("cudaGraphChildGraphNodeGetGraph", "hipGraphChildGraphNodeGetGraph", "graph"); + subst("cudaGraphClone", "hipGraphClone", "graph"); + subst("cudaGraphCreate", "hipGraphCreate", "graph"); + subst("cudaGraphDebugDotPrint", "hipGraphDebugDotPrint", "graph"); + subst("cudaGraphDestroy", "hipGraphDestroy", "graph"); + subst("cudaGraphDestroyNode", "hipGraphDestroyNode", "graph"); + subst("cudaGraphEventRecordNodeGetEvent", "hipGraphEventRecordNodeGetEvent", "graph"); + subst("cudaGraphEventRecordNodeSetEvent", "hipGraphEventRecordNodeSetEvent", "graph"); + subst("cudaGraphEventWaitNodeGetEvent", "hipGraphEventWaitNodeGetEvent", "graph"); + subst("cudaGraphEventWaitNodeSetEvent", "hipGraphEventWaitNodeSetEvent", "graph"); + subst("cudaGraphExecChildGraphNodeSetParams", "hipGraphExecChildGraphNodeSetParams", "graph"); + subst("cudaGraphExecDestroy", "hipGraphExecDestroy", "graph"); + subst("cudaGraphExecEventRecordNodeSetEvent", "hipGraphExecEventRecordNodeSetEvent", "graph"); + subst("cudaGraphExecEventWaitNodeSetEvent", "hipGraphExecEventWaitNodeSetEvent", "graph"); + subst("cudaGraphExecExternalSemaphoresSignalNodeSetParams", "hipGraphExecExternalSemaphoresSignalNodeSetParams", "graph"); + subst("cudaGraphExecExternalSemaphoresWaitNodeSetParams", "hipGraphExecExternalSemaphoresWaitNodeSetParams", "graph"); + subst("cudaGraphExecHostNodeSetParams", "hipGraphExecHostNodeSetParams", "graph"); + subst("cudaGraphExecKernelNodeSetParams", "hipGraphExecKernelNodeSetParams", "graph"); + subst("cudaGraphExecMemcpyNodeSetParams", "hipGraphExecMemcpyNodeSetParams", "graph"); + subst("cudaGraphExecMemcpyNodeSetParams1D", "hipGraphExecMemcpyNodeSetParams1D", "graph"); + subst("cudaGraphExecMemcpyNodeSetParamsFromSymbol", "hipGraphExecMemcpyNodeSetParamsFromSymbol", "graph"); + subst("cudaGraphExecMemcpyNodeSetParamsToSymbol", "hipGraphExecMemcpyNodeSetParamsToSymbol", "graph"); + subst("cudaGraphExecMemsetNodeSetParams", "hipGraphExecMemsetNodeSetParams", "graph"); + subst("cudaGraphExecUpdate", "hipGraphExecUpdate", "graph"); + subst("cudaGraphExternalSemaphoresSignalNodeGetParams", "hipGraphExternalSemaphoresSignalNodeGetParams", "graph"); + subst("cudaGraphExternalSemaphoresSignalNodeSetParams", "hipGraphExternalSemaphoresSignalNodeSetParams", "graph"); + subst("cudaGraphExternalSemaphoresWaitNodeGetParams", "hipGraphExternalSemaphoresWaitNodeGetParams", "graph"); + subst("cudaGraphExternalSemaphoresWaitNodeSetParams", "hipGraphExternalSemaphoresWaitNodeSetParams", "graph"); + subst("cudaGraphGetEdges", "hipGraphGetEdges", "graph"); + subst("cudaGraphGetNodes", "hipGraphGetNodes", "graph"); + subst("cudaGraphGetRootNodes", "hipGraphGetRootNodes", "graph"); + subst("cudaGraphHostNodeGetParams", "hipGraphHostNodeGetParams", "graph"); + subst("cudaGraphHostNodeSetParams", "hipGraphHostNodeSetParams", "graph"); + subst("cudaGraphInstantiate", "hipGraphInstantiate", "graph"); + subst("cudaGraphInstantiateWithFlags", "hipGraphInstantiateWithFlags", "graph"); + subst("cudaGraphKernelNodeCopyAttributes", "hipGraphKernelNodeCopyAttributes", "graph"); + subst("cudaGraphKernelNodeGetAttribute", "hipGraphKernelNodeGetAttribute", "graph"); + subst("cudaGraphKernelNodeGetParams", "hipGraphKernelNodeGetParams", "graph"); + subst("cudaGraphKernelNodeSetAttribute", "hipGraphKernelNodeSetAttribute", "graph"); + subst("cudaGraphKernelNodeSetParams", "hipGraphKernelNodeSetParams", "graph"); + subst("cudaGraphLaunch", "hipGraphLaunch", "graph"); + subst("cudaGraphMemAllocNodeGetParams", "hipGraphMemAllocNodeGetParams", "graph"); + subst("cudaGraphMemFreeNodeGetParams", "hipGraphMemFreeNodeGetParams", "graph"); + subst("cudaGraphMemcpyNodeGetParams", "hipGraphMemcpyNodeGetParams", "graph"); + subst("cudaGraphMemcpyNodeSetParams", "hipGraphMemcpyNodeSetParams", "graph"); + subst("cudaGraphMemcpyNodeSetParams1D", "hipGraphMemcpyNodeSetParams1D", "graph"); + subst("cudaGraphMemcpyNodeSetParamsFromSymbol", "hipGraphMemcpyNodeSetParamsFromSymbol", "graph"); + subst("cudaGraphMemcpyNodeSetParamsToSymbol", "hipGraphMemcpyNodeSetParamsToSymbol", "graph"); + subst("cudaGraphMemsetNodeGetParams", "hipGraphMemsetNodeGetParams", "graph"); + subst("cudaGraphMemsetNodeSetParams", "hipGraphMemsetNodeSetParams", "graph"); + subst("cudaGraphNodeFindInClone", "hipGraphNodeFindInClone", "graph"); + subst("cudaGraphNodeGetDependencies", "hipGraphNodeGetDependencies", "graph"); + subst("cudaGraphNodeGetDependentNodes", "hipGraphNodeGetDependentNodes", "graph"); + subst("cudaGraphNodeGetEnabled", "hipGraphNodeGetEnabled", "graph"); + subst("cudaGraphNodeGetType", "hipGraphNodeGetType", "graph"); + subst("cudaGraphNodeSetEnabled", "hipGraphNodeSetEnabled", "graph"); + subst("cudaGraphReleaseUserObject", "hipGraphReleaseUserObject", "graph"); + subst("cudaGraphRemoveDependencies", "hipGraphRemoveDependencies", "graph"); + subst("cudaGraphRetainUserObject", "hipGraphRetainUserObject", "graph"); + subst("cudaGraphUpload", "hipGraphUpload", "graph"); + subst("cudaUserObjectCreate", "hipUserObjectCreate", "graph"); + subst("cudaUserObjectRelease", "hipUserObjectRelease", "graph"); + subst("cudaUserObjectRetain", "hipUserObjectRetain", "graph"); + subst("cuOccupancyMaxActiveBlocksPerMultiprocessor", "hipModuleOccupancyMaxActiveBlocksPerMultiprocessor", "occupancy"); + subst("cuOccupancyMaxActiveBlocksPerMultiprocessorWithFlags", "hipModuleOccupancyMaxActiveBlocksPerMultiprocessorWithFlags", "occupancy"); + subst("cuOccupancyMaxPotentialBlockSize", "hipModuleOccupancyMaxPotentialBlockSize", "occupancy"); + subst("cuOccupancyMaxPotentialBlockSizeWithFlags", "hipModuleOccupancyMaxPotentialBlockSizeWithFlags", "occupancy"); + subst("cudaOccupancyMaxActiveBlocksPerMultiprocessor", "hipOccupancyMaxActiveBlocksPerMultiprocessor", "occupancy"); + subst("cudaOccupancyMaxActiveBlocksPerMultiprocessorWithFlags", "hipOccupancyMaxActiveBlocksPerMultiprocessorWithFlags", "occupancy"); + subst("cudaOccupancyMaxPotentialBlockSize", "hipOccupancyMaxPotentialBlockSize", "occupancy"); + subst("cudaOccupancyMaxPotentialBlockSizeVariableSMem", "hipOccupancyMaxPotentialBlockSizeVariableSMem", "occupancy"); + subst("cudaOccupancyMaxPotentialBlockSizeVariableSMemWithFlags", "hipOccupancyMaxPotentialBlockSizeVariableSMemWithFlags", "occupancy"); + subst("cudaOccupancyMaxPotentialBlockSizeWithFlags", "hipOccupancyMaxPotentialBlockSizeWithFlags", "occupancy"); + subst("cuTexObjectCreate", "hipTexObjectCreate", "texture"); + subst("cuTexObjectDestroy", "hipTexObjectDestroy", "texture"); + subst("cuTexObjectGetResourceDesc", "hipTexObjectGetResourceDesc", "texture"); + subst("cuTexObjectGetResourceViewDesc", "hipTexObjectGetResourceViewDesc", "texture"); + subst("cuTexObjectGetTextureDesc", "hipTexObjectGetTextureDesc", "texture"); + subst("cuTexRefGetAddress", "hipTexRefGetAddress", "texture"); + subst("cuTexRefGetAddressMode", "hipTexRefGetAddressMode", "texture"); + subst("cuTexRefGetAddress_v2", "hipTexRefGetAddress", "texture"); + subst("cuTexRefGetArray", "hipTexRefGetArray", "texture"); + subst("cuTexRefGetBorderColor", "hipTexRefGetBorderColor", "texture"); + subst("cuTexRefGetFilterMode", "hipTexRefGetFilterMode", "texture"); + subst("cuTexRefGetFlags", "hipTexRefGetFlags", "texture"); + subst("cuTexRefGetFormat", "hipTexRefGetFormat", "texture"); + subst("cuTexRefGetMaxAnisotropy", "hipTexRefGetMaxAnisotropy", "texture"); + subst("cuTexRefGetMipmapFilterMode", "hipTexRefGetMipmapFilterMode", "texture"); + subst("cuTexRefGetMipmapLevelBias", "hipTexRefGetMipmapLevelBias", "texture"); + subst("cuTexRefGetMipmapLevelClamp", "hipTexRefGetMipmapLevelClamp", "texture"); + subst("cuTexRefGetMipmappedArray", "hipTexRefGetMipMappedArray", "texture"); + subst("cuTexRefSetAddress", "hipTexRefSetAddress", "texture"); + subst("cuTexRefSetAddress2D", "hipTexRefSetAddress2D", "texture"); + subst("cuTexRefSetAddress2D_v2", "hipTexRefSetAddress2D", "texture"); + subst("cuTexRefSetAddress2D_v3", "hipTexRefSetAddress2D", "texture"); + subst("cuTexRefSetAddressMode", "hipTexRefSetAddressMode", "texture"); + subst("cuTexRefSetAddress_v2", "hipTexRefSetAddress", "texture"); + subst("cuTexRefSetArray", "hipTexRefSetArray", "texture"); + subst("cuTexRefSetBorderColor", "hipTexRefSetBorderColor", "texture"); + subst("cuTexRefSetFilterMode", "hipTexRefSetFilterMode", "texture"); + subst("cuTexRefSetFlags", "hipTexRefSetFlags", "texture"); + subst("cuTexRefSetFormat", "hipTexRefSetFormat", "texture"); + subst("cuTexRefSetMaxAnisotropy", "hipTexRefSetMaxAnisotropy", "texture"); + subst("cuTexRefSetMipmapFilterMode", "hipTexRefSetMipmapFilterMode", "texture"); + subst("cuTexRefSetMipmapLevelBias", "hipTexRefSetMipmapLevelBias", "texture"); + subst("cuTexRefSetMipmapLevelClamp", "hipTexRefSetMipmapLevelClamp", "texture"); + subst("cuTexRefSetMipmappedArray", "hipTexRefSetMipmappedArray", "texture"); + subst("cudaBindTexture", "hipBindTexture", "texture"); + subst("cudaBindTexture2D", "hipBindTexture2D", "texture"); + subst("cudaBindTextureToArray", "hipBindTextureToArray", "texture"); + subst("cudaBindTextureToMipmappedArray", "hipBindTextureToMipmappedArray", "texture"); + subst("cudaCreateChannelDesc", "hipCreateChannelDesc", "texture"); + subst("cudaCreateTextureObject", "hipCreateTextureObject", "texture"); + subst("cudaDestroyTextureObject", "hipDestroyTextureObject", "texture"); + subst("cudaGetChannelDesc", "hipGetChannelDesc", "texture"); + subst("cudaGetTextureAlignmentOffset", "hipGetTextureAlignmentOffset", "texture"); + subst("cudaGetTextureObjectResourceDesc", "hipGetTextureObjectResourceDesc", "texture"); + subst("cudaGetTextureObjectResourceViewDesc", "hipGetTextureObjectResourceViewDesc", "texture"); + subst("cudaGetTextureObjectTextureDesc", "hipGetTextureObjectTextureDesc", "texture"); + subst("cudaGetTextureReference", "hipGetTextureReference", "texture"); + subst("cudaUnbindTexture", "hipUnbindTexture", "texture"); + subst("cudaCreateSurfaceObject", "hipCreateSurfaceObject", "surface"); + subst("cudaDestroySurfaceObject", "hipDestroySurfaceObject", "surface"); + subst("cuCtxDisablePeerAccess", "hipCtxDisablePeerAccess", "peer"); + subst("cuCtxEnablePeerAccess", "hipCtxEnablePeerAccess", "peer"); + subst("cuDeviceCanAccessPeer", "hipDeviceCanAccessPeer", "peer"); + subst("cuDeviceGetP2PAttribute", "hipDeviceGetP2PAttribute", "peer"); + subst("cudaDeviceCanAccessPeer", "hipDeviceCanAccessPeer", "peer"); + subst("cudaDeviceDisablePeerAccess", "hipDeviceDisablePeerAccess", "peer"); + subst("cudaDeviceEnablePeerAccess", "hipDeviceEnablePeerAccess", "peer"); + subst("cuGraphicsMapResources", "hipGraphicsMapResources", "graphics"); + subst("cuGraphicsResourceGetMappedPointer", "hipGraphicsResourceGetMappedPointer", "graphics"); + subst("cuGraphicsResourceGetMappedPointer_v2", "hipGraphicsResourceGetMappedPointer", "graphics"); + subst("cuGraphicsSubResourceGetMappedArray", "hipGraphicsSubResourceGetMappedArray", "graphics"); + subst("cuGraphicsUnmapResources", "hipGraphicsUnmapResources", "graphics"); + subst("cuGraphicsUnregisterResource", "hipGraphicsUnregisterResource", "graphics"); + subst("cudaGraphicsMapResources", "hipGraphicsMapResources", "graphics"); + subst("cudaGraphicsResourceGetMappedPointer", "hipGraphicsResourceGetMappedPointer", "graphics"); + subst("cudaGraphicsSubResourceGetMappedArray", "hipGraphicsSubResourceGetMappedArray", "graphics"); + subst("cudaGraphicsUnmapResources", "hipGraphicsUnmapResources", "graphics"); + subst("cudaGraphicsUnregisterResource", "hipGraphicsUnregisterResource", "graphics"); + subst("cuProfilerStart", "hipProfilerStart", "profiler"); + subst("cuProfilerStop", "hipProfilerStop", "profiler"); + subst("cudaProfilerStart", "hipProfilerStart", "profiler"); + subst("cudaProfilerStop", "hipProfilerStop", "profiler"); + subst("cuGLGetDevices", "hipGLGetDevices", "openGL"); + subst("cuGraphicsGLRegisterBuffer", "hipGraphicsGLRegisterBuffer", "openGL"); + subst("cuGraphicsGLRegisterImage", "hipGraphicsGLRegisterImage", "openGL"); + subst("cudaGLGetDevices", "hipGLGetDevices", "openGL"); + subst("cudaGraphicsGLRegisterBuffer", "hipGraphicsGLRegisterBuffer", "openGL"); + subst("cudaGraphicsGLRegisterImage", "hipGraphicsGLRegisterImage", "openGL"); + subst("cudaThreadExit", "hipDeviceReset", "thread"); + subst("cudaThreadGetCacheConfig", "hipDeviceGetCacheConfig", "thread"); + subst("cudaThreadSetCacheConfig", "hipDeviceSetCacheConfig", "thread"); + subst("cudaThreadSynchronize", "hipDeviceSynchronize", "thread"); + subst("cuCabs", "hipCabs", "complex"); + subst("cuCabsf", "hipCabsf", "complex"); + subst("cuCadd", "hipCadd", "complex"); + subst("cuCaddf", "hipCaddf", "complex"); + subst("cuCdiv", "hipCdiv", "complex"); + subst("cuCdivf", "hipCdivf", "complex"); + subst("cuCfma", "hipCfma", "complex"); + subst("cuCfmaf", "hipCfmaf", "complex"); + subst("cuCimag", "hipCimag", "complex"); + subst("cuCimagf", "hipCimagf", "complex"); + subst("cuCmul", "hipCmul", "complex"); + subst("cuCmulf", "hipCmulf", "complex"); + subst("cuComplexDoubleToFloat", "hipComplexDoubleToFloat", "complex"); + subst("cuComplexFloatToDouble", "hipComplexFloatToDouble", "complex"); + subst("cuConj", "hipConj", "complex"); + subst("cuConjf", "hipConjf", "complex"); + subst("cuCreal", "hipCreal", "complex"); + subst("cuCrealf", "hipCrealf", "complex"); + subst("cuCsub", "hipCsub", "complex"); + subst("cuCsubf", "hipCsubf", "complex"); + subst("make_cuComplex", "make_hipComplex", "complex"); + subst("make_cuDoubleComplex", "make_hipDoubleComplex", "complex"); + subst("make_cuFloatComplex", "make_hipFloatComplex", "complex"); + subst("cublasAxpyEx", "hipblasAxpyEx_v2", "library"); + subst("cublasCaxpy", "hipblasCaxpy_v2", "library"); + subst("cublasCaxpy_64", "hipblasCaxpy_v2_64", "library"); + subst("cublasCaxpy_v2", "hipblasCaxpy_v2", "library"); + subst("cublasCaxpy_v2_64", "hipblasCaxpy_v2_64", "library"); + subst("cublasCcopy", "hipblasCcopy_v2", "library"); + subst("cublasCcopy_64", "hipblasCcopy_v2_64", "library"); + subst("cublasCcopy_v2", "hipblasCcopy_v2", "library"); + subst("cublasCcopy_v2_64", "hipblasCcopy_v2_64", "library"); + subst("cublasCdgmm", "hipblasCdgmm_v2", "library"); + subst("cublasCdotc", "hipblasCdotc_v2", "library"); + subst("cublasCdotc_64", "hipblasCdotc_v2_64", "library"); + subst("cublasCdotc_v2", "hipblasCdotc_v2", "library"); + subst("cublasCdotc_v2_64", "hipblasCdotc_v2_64", "library"); + subst("cublasCdotu", "hipblasCdotu_v2", "library"); + subst("cublasCdotu_64", "hipblasCdotu_v2_64", "library"); + subst("cublasCdotu_v2", "hipblasCdotu_v2", "library"); + subst("cublasCdotu_v2_64", "hipblasCdotu_v2_64", "library"); + subst("cublasCgbmv", "hipblasCgbmv_v2", "library"); + subst("cublasCgbmv_v2", "hipblasCgbmv_v2", "library"); + subst("cublasCgeam", "hipblasCgeam_v2", "library"); + subst("cublasCgelsBatched", "hipblasCgelsBatched_v2", "library"); + subst("cublasCgemm", "hipblasCgemm_v2", "library"); + subst("cublasCgemmBatched", "hipblasCgemmBatched_v2", "library"); + subst("cublasCgemmStridedBatched", "hipblasCgemmStridedBatched_v2", "library"); + subst("cublasCgemm_v2", "hipblasCgemm_v2", "library"); + subst("cublasCgemv", "hipblasCgemv_v2", "library"); + subst("cublasCgemvBatched", "hipblasCgemvBatched_v2", "library"); + subst("cublasCgemvStridedBatched", "hipblasCgemvStridedBatched_v2", "library"); + subst("cublasCgemv_v2", "hipblasCgemv_v2", "library"); + subst("cublasCgeqrfBatched", "hipblasCgeqrfBatched_v2", "library"); + subst("cublasCgerc", "hipblasCgerc_v2", "library"); + subst("cublasCgerc_v2", "hipblasCgerc_v2", "library"); + subst("cublasCgeru", "hipblasCgeru_v2", "library"); + subst("cublasCgeru_v2", "hipblasCgeru_v2", "library"); + subst("cublasCgetrfBatched", "hipblasCgetrfBatched_v2", "library"); + subst("cublasCgetriBatched", "hipblasCgetriBatched_v2", "library"); + subst("cublasCgetrsBatched", "hipblasCgetrsBatched_v2", "library"); + subst("cublasChbmv", "hipblasChbmv_v2", "library"); + subst("cublasChbmv_v2", "hipblasChbmv_v2", "library"); + subst("cublasChemm", "hipblasChemm_v2", "library"); + subst("cublasChemm_v2", "hipblasChemm_v2", "library"); + subst("cublasChemv", "hipblasChemv_v2", "library"); + subst("cublasChemv_v2", "hipblasChemv_v2", "library"); + subst("cublasCher", "hipblasCher_v2", "library"); + subst("cublasCher2", "hipblasCher2_v2", "library"); + subst("cublasCher2_v2", "hipblasCher2_v2", "library"); + subst("cublasCher2k", "hipblasCher2k_v2", "library"); + subst("cublasCher2k_v2", "hipblasCher2k_v2", "library"); + subst("cublasCher_v2", "hipblasCher_v2", "library"); + subst("cublasCherk", "hipblasCherk_v2", "library"); + subst("cublasCherk_v2", "hipblasCherk_v2", "library"); + subst("cublasCherkx", "hipblasCherkx_v2", "library"); + subst("cublasChpmv", "hipblasChpmv_v2", "library"); + subst("cublasChpmv_v2", "hipblasChpmv_v2", "library"); + subst("cublasChpr", "hipblasChpr_v2", "library"); + subst("cublasChpr2", "hipblasChpr2_v2", "library"); + subst("cublasChpr2_v2", "hipblasChpr2_v2", "library"); + subst("cublasChpr_v2", "hipblasChpr_v2", "library"); + subst("cublasCreate", "hipblasCreate", "library"); + subst("cublasCreate_v2", "hipblasCreate", "library"); + subst("cublasCrot", "hipblasCrot_v2", "library"); + subst("cublasCrot_64", "hipblasCrot_v2_64", "library"); + subst("cublasCrot_v2", "hipblasCrot_v2", "library"); + subst("cublasCrot_v2_64", "hipblasCrot_v2_64", "library"); + subst("cublasCrotg", "hipblasCrotg_v2", "library"); + subst("cublasCrotg_v2", "hipblasCrotg_v2", "library"); + subst("cublasCscal", "hipblasCscal_v2", "library"); + subst("cublasCscal_64", "hipblasCscal_v2_64", "library"); + subst("cublasCscal_v2", "hipblasCscal_v2", "library"); + subst("cublasCscal_v2_64", "hipblasCscal_v2_64", "library"); + subst("cublasCsrot", "hipblasCsrot_v2", "library"); + subst("cublasCsrot_64", "hipblasCsrot_v2_64", "library"); + subst("cublasCsrot_v2", "hipblasCsrot_v2", "library"); + subst("cublasCsrot_v2_64", "hipblasCsrot_v2_64", "library"); + subst("cublasCsscal", "hipblasCsscal_v2", "library"); + subst("cublasCsscal_64", "hipblasCsscal_v2_64", "library"); + subst("cublasCsscal_v2", "hipblasCsscal_v2", "library"); + subst("cublasCsscal_v2_64", "hipblasCsscal_v2_64", "library"); + subst("cublasCswap", "hipblasCswap_v2", "library"); + subst("cublasCswap_64", "hipblasCswap_v2_64", "library"); + subst("cublasCswap_v2", "hipblasCswap_v2", "library"); + subst("cublasCswap_v2_64", "hipblasCswap_v2_64", "library"); + subst("cublasCsymm", "hipblasCsymm_v2", "library"); + subst("cublasCsymm_v2", "hipblasCsymm_v2", "library"); + subst("cublasCsymv", "hipblasCsymv_v2", "library"); + subst("cublasCsymv_v2", "hipblasCsymv_v2", "library"); + subst("cublasCsyr", "hipblasCsyr_v2", "library"); + subst("cublasCsyr2", "hipblasCsyr2_v2", "library"); + subst("cublasCsyr2_v2", "hipblasCsyr2_v2", "library"); + subst("cublasCsyr2k", "hipblasCsyr2k_v2", "library"); + subst("cublasCsyr2k_v2", "hipblasCsyr2k_v2", "library"); + subst("cublasCsyr_v2", "hipblasCsyr_v2", "library"); + subst("cublasCsyrk", "hipblasCsyrk_v2", "library"); + subst("cublasCsyrk_v2", "hipblasCsyrk_v2", "library"); + subst("cublasCsyrkx", "hipblasCsyrkx_v2", "library"); + subst("cublasCtbmv", "hipblasCtbmv_v2", "library"); + subst("cublasCtbmv_v2", "hipblasCtbmv_v2", "library"); + subst("cublasCtbsv", "hipblasCtbsv_v2", "library"); + subst("cublasCtbsv_v2", "hipblasCtbsv_v2", "library"); + subst("cublasCtpmv", "hipblasCtpmv_v2", "library"); + subst("cublasCtpmv_v2", "hipblasCtpmv_v2", "library"); + subst("cublasCtpsv", "hipblasCtpsv_v2", "library"); + subst("cublasCtpsv_v2", "hipblasCtpsv_v2", "library"); + subst("cublasCtrmm", "hipblasCtrmm_v2", "library"); + subst("cublasCtrmm_v2", "hipblasCtrmm_v2", "library"); + subst("cublasCtrmv", "hipblasCtrmv_v2", "library"); + subst("cublasCtrmv_v2", "hipblasCtrmv_v2", "library"); + subst("cublasCtrsm", "hipblasCtrsm_v2", "library"); + subst("cublasCtrsmBatched", "hipblasCtrsmBatched_v2", "library"); + subst("cublasCtrsm_v2", "hipblasCtrsm_v2", "library"); + subst("cublasCtrsv", "hipblasCtrsv_v2", "library"); + subst("cublasCtrsv_v2", "hipblasCtrsv_v2", "library"); + subst("cublasDasum", "hipblasDasum", "library"); + subst("cublasDasum_64", "hipblasDasum_64", "library"); + subst("cublasDasum_v2", "hipblasDasum", "library"); + subst("cublasDasum_v2_64", "hipblasDasum_64", "library"); + subst("cublasDaxpy", "hipblasDaxpy", "library"); + subst("cublasDaxpy_64", "hipblasDaxpy_64", "library"); + subst("cublasDaxpy_v2", "hipblasDaxpy", "library"); + subst("cublasDaxpy_v2_64", "hipblasDaxpy_64", "library"); + subst("cublasDcopy", "hipblasDcopy", "library"); + subst("cublasDcopy_64", "hipblasDcopy_64", "library"); + subst("cublasDcopy_v2", "hipblasDcopy", "library"); + subst("cublasDcopy_v2_64", "hipblasDcopy_64", "library"); + subst("cublasDdgmm", "hipblasDdgmm", "library"); + subst("cublasDdot", "hipblasDdot", "library"); + subst("cublasDdot_64", "hipblasDdot_64", "library"); + subst("cublasDdot_v2", "hipblasDdot", "library"); + subst("cublasDdot_v2_64", "hipblasDdot_64", "library"); + subst("cublasDestroy", "hipblasDestroy", "library"); + subst("cublasDestroy_v2", "hipblasDestroy", "library"); + subst("cublasDgbmv", "hipblasDgbmv", "library"); + subst("cublasDgbmv_v2", "hipblasDgbmv", "library"); + subst("cublasDgeam", "hipblasDgeam", "library"); + subst("cublasDgelsBatched", "hipblasDgelsBatched", "library"); + subst("cublasDgemm", "hipblasDgemm", "library"); + subst("cublasDgemmBatched", "hipblasDgemmBatched", "library"); + subst("cublasDgemmStridedBatched", "hipblasDgemmStridedBatched", "library"); + subst("cublasDgemm_v2", "hipblasDgemm", "library"); + subst("cublasDgemv", "hipblasDgemv", "library"); + subst("cublasDgemvBatched", "hipblasDgemvBatched", "library"); + subst("cublasDgemvStridedBatched", "hipblasDgemvStridedBatched", "library"); + subst("cublasDgemv_v2", "hipblasDgemv", "library"); + subst("cublasDgeqrfBatched", "hipblasDgeqrfBatched", "library"); + subst("cublasDger", "hipblasDger", "library"); + subst("cublasDger_v2", "hipblasDger", "library"); + subst("cublasDgetrfBatched", "hipblasDgetrfBatched", "library"); + subst("cublasDgetriBatched", "hipblasDgetriBatched", "library"); + subst("cublasDgetrsBatched", "hipblasDgetrsBatched", "library"); + subst("cublasDnrm2", "hipblasDnrm2", "library"); + subst("cublasDnrm2_64", "hipblasDnrm2_64", "library"); + subst("cublasDnrm2_v2", "hipblasDnrm2", "library"); + subst("cublasDnrm2_v2_64", "hipblasDnrm2_64", "library"); + subst("cublasDotEx", "hipblasDotEx_v2", "library"); + subst("cublasDotcEx", "hipblasDotcEx_v2", "library"); + subst("cublasDrot", "hipblasDrot", "library"); + subst("cublasDrot_64", "hipblasDrot_64", "library"); + subst("cublasDrot_v2", "hipblasDrot", "library"); + subst("cublasDrot_v2_64", "hipblasDrot_64", "library"); + subst("cublasDrotg", "hipblasDrotg", "library"); + subst("cublasDrotg_v2", "hipblasDrotg", "library"); + subst("cublasDrotm", "hipblasDrotm", "library"); + subst("cublasDrotm_64", "hipblasDrotm_64", "library"); + subst("cublasDrotm_v2", "hipblasDrotm", "library"); + subst("cublasDrotm_v2_64", "hipblasDrotm_64", "library"); + subst("cublasDrotmg", "hipblasDrotmg", "library"); + subst("cublasDrotmg_v2", "hipblasDrotmg", "library"); + subst("cublasDsbmv", "hipblasDsbmv", "library"); + subst("cublasDsbmv_v2", "hipblasDsbmv", "library"); + subst("cublasDscal", "hipblasDscal", "library"); + subst("cublasDscal_64", "hipblasDscal_64", "library"); + subst("cublasDscal_v2", "hipblasDscal", "library"); + subst("cublasDscal_v2_64", "hipblasDscal_64", "library"); + subst("cublasDspmv", "hipblasDspmv", "library"); + subst("cublasDspmv_v2", "hipblasDspmv", "library"); + subst("cublasDspr", "hipblasDspr", "library"); + subst("cublasDspr2", "hipblasDspr2", "library"); + subst("cublasDspr2_v2", "hipblasDspr2", "library"); + subst("cublasDspr_v2", "hipblasDspr", "library"); + subst("cublasDswap", "hipblasDswap", "library"); + subst("cublasDswap_64", "hipblasDswap_64", "library"); + subst("cublasDswap_v2", "hipblasDswap", "library"); + subst("cublasDswap_v2_64", "hipblasDswap_64", "library"); + subst("cublasDsymm", "hipblasDsymm", "library"); + subst("cublasDsymm_v2", "hipblasDsymm", "library"); + subst("cublasDsymv", "hipblasDsymv", "library"); + subst("cublasDsymv_v2", "hipblasDsymv", "library"); + subst("cublasDsyr", "hipblasDsyr", "library"); + subst("cublasDsyr2", "hipblasDsyr2", "library"); + subst("cublasDsyr2_v2", "hipblasDsyr2", "library"); + subst("cublasDsyr2k", "hipblasDsyr2k", "library"); + subst("cublasDsyr2k_v2", "hipblasDsyr2k", "library"); + subst("cublasDsyr_v2", "hipblasDsyr", "library"); + subst("cublasDsyrk", "hipblasDsyrk", "library"); + subst("cublasDsyrk_v2", "hipblasDsyrk", "library"); + subst("cublasDsyrkx", "hipblasDsyrkx", "library"); + subst("cublasDtbmv", "hipblasDtbmv", "library"); + subst("cublasDtbmv_v2", "hipblasDtbmv", "library"); + subst("cublasDtbsv", "hipblasDtbsv", "library"); + subst("cublasDtbsv_v2", "hipblasDtbsv", "library"); + subst("cublasDtpmv", "hipblasDtpmv", "library"); + subst("cublasDtpmv_v2", "hipblasDtpmv", "library"); + subst("cublasDtpsv", "hipblasDtpsv", "library"); + subst("cublasDtpsv_v2", "hipblasDtpsv", "library"); + subst("cublasDtrmm", "hipblasDtrmm", "library"); + subst("cublasDtrmm_v2", "hipblasDtrmm", "library"); + subst("cublasDtrmv", "hipblasDtrmv", "library"); + subst("cublasDtrmv_v2", "hipblasDtrmv", "library"); + subst("cublasDtrsm", "hipblasDtrsm", "library"); + subst("cublasDtrsmBatched", "hipblasDtrsmBatched", "library"); + subst("cublasDtrsm_v2", "hipblasDtrsm", "library"); + subst("cublasDtrsv", "hipblasDtrsv", "library"); + subst("cublasDtrsv_v2", "hipblasDtrsv", "library"); + subst("cublasDzasum", "hipblasDzasum_v2", "library"); + subst("cublasDzasum_64", "hipblasDzasum_v2_64", "library"); + subst("cublasDzasum_v2", "hipblasDzasum_v2", "library"); + subst("cublasDzasum_v2_64", "hipblasDzasum_v2_64", "library"); + subst("cublasDznrm2", "hipblasDznrm2_v2", "library"); + subst("cublasDznrm2_64", "hipblasDznrm2_v2_64", "library"); + subst("cublasDznrm2_v2", "hipblasDznrm2_v2", "library"); + subst("cublasDznrm2_v2_64", "hipblasDznrm2_v2_64", "library"); + subst("cublasGemmBatchedEx", "hipblasGemmBatchedEx_v2", "library"); + subst("cublasGemmEx", "hipblasGemmEx_v2", "library"); + subst("cublasGemmStridedBatchedEx", "hipblasGemmStridedBatchedEx_v2", "library"); + subst("cublasGetAtomicsMode", "hipblasGetAtomicsMode", "library"); + subst("cublasGetMathMode", "hipblasGetMathMode", "library"); + subst("cublasGetMatrix", "hipblasGetMatrix", "library"); + subst("cublasGetMatrixAsync", "hipblasGetMatrixAsync", "library"); + subst("cublasGetPointerMode", "hipblasGetPointerMode", "library"); + subst("cublasGetPointerMode_v2", "hipblasGetPointerMode", "library"); + subst("cublasGetStream", "hipblasGetStream", "library"); + subst("cublasGetStream_v2", "hipblasGetStream", "library"); + subst("cublasGetVector", "hipblasGetVector", "library"); + subst("cublasGetVectorAsync", "hipblasGetVectorAsync", "library"); + subst("cublasHgemm", "hipblasHgemm", "library"); + subst("cublasHgemmBatched", "hipblasHgemmBatched", "library"); + subst("cublasHgemmStridedBatched", "hipblasHgemmStridedBatched", "library"); + subst("cublasIcamax", "hipblasIcamax_v2", "library"); + subst("cublasIcamax_64", "hipblasIcamax_v2_64", "library"); + subst("cublasIcamax_v2", "hipblasIcamax_v2", "library"); + subst("cublasIcamax_v2_64", "hipblasIcamax_v2_64", "library"); + subst("cublasIcamin", "hipblasIcamin_v2", "library"); + subst("cublasIcamin_64", "hipblasIcamin_v2_64", "library"); + subst("cublasIcamin_v2", "hipblasIcamin_v2", "library"); + subst("cublasIcamin_v2_64", "hipblasIcamin_v2_64", "library"); + subst("cublasIdamax", "hipblasIdamax", "library"); + subst("cublasIdamax_64", "hipblasIdamax_64", "library"); + subst("cublasIdamax_v2", "hipblasIdamax", "library"); + subst("cublasIdamax_v2_64", "hipblasIdamax_64", "library"); + subst("cublasIdamin", "hipblasIdamin", "library"); + subst("cublasIdamin_64", "hipblasIdamin_64", "library"); + subst("cublasIdamin_v2", "hipblasIdamin", "library"); + subst("cublasIdamin_v2_64", "hipblasIdamin_64", "library"); + subst("cublasIsamax", "hipblasIsamax", "library"); + subst("cublasIsamax_64", "hipblasIsamax_64", "library"); + subst("cublasIsamax_v2", "hipblasIsamax", "library"); + subst("cublasIsamax_v2_64", "hipblasIsamax_64", "library"); + subst("cublasIsamin", "hipblasIsamin", "library"); + subst("cublasIsamin_64", "hipblasIsamin_64", "library"); + subst("cublasIsamin_v2", "hipblasIsamin", "library"); + subst("cublasIsamin_v2_64", "hipblasIsamin_64", "library"); + subst("cublasIzamax", "hipblasIzamax_v2", "library"); + subst("cublasIzamax_64", "hipblasIzamax_v2_64", "library"); + subst("cublasIzamax_v2", "hipblasIzamax_v2", "library"); + subst("cublasIzamax_v2_64", "hipblasIzamax_v2_64", "library"); + subst("cublasIzamin", "hipblasIzamin_v2", "library"); + subst("cublasIzamin_64", "hipblasIzamin_v2_64", "library"); + subst("cublasIzamin_v2", "hipblasIzamin_v2", "library"); + subst("cublasIzamin_v2_64", "hipblasIzamin_v2_64", "library"); + subst("cublasLtCreate", "hipblasLtCreate", "library"); + subst("cublasLtDestroy", "hipblasLtDestroy", "library"); + subst("cublasLtMatmul", "hipblasLtMatmul", "library"); + subst("cublasLtMatmulAlgoGetHeuristic", "hipblasLtMatmulAlgoGetHeuristic", "library"); + subst("cublasLtMatmulDescCreate", "hipblasLtMatmulDescCreate", "library"); + subst("cublasLtMatmulDescDestroy", "hipblasLtMatmulDescDestroy", "library"); + subst("cublasLtMatmulDescGetAttribute", "hipblasLtMatmulDescGetAttribute", "library"); + subst("cublasLtMatmulDescSetAttribute", "hipblasLtMatmulDescSetAttribute", "library"); + subst("cublasLtMatmulPreferenceCreate", "hipblasLtMatmulPreferenceCreate", "library"); + subst("cublasLtMatmulPreferenceDestroy", "hipblasLtMatmulPreferenceDestroy", "library"); + subst("cublasLtMatmulPreferenceGetAttribute", "hipblasLtMatmulPreferenceGetAttribute", "library"); + subst("cublasLtMatmulPreferenceSetAttribute", "hipblasLtMatmulPreferenceSetAttribute", "library"); + subst("cublasLtMatrixLayoutCreate", "hipblasLtMatrixLayoutCreate", "library"); + subst("cublasLtMatrixLayoutDestroy", "hipblasLtMatrixLayoutDestroy", "library"); + subst("cublasLtMatrixLayoutGetAttribute", "hipblasLtMatrixLayoutGetAttribute", "library"); + subst("cublasLtMatrixLayoutSetAttribute", "hipblasLtMatrixLayoutSetAttribute", "library"); + subst("cublasLtMatrixTransform", "hipblasLtMatrixTransform", "library"); + subst("cublasLtMatrixTransformDescCreate", "hipblasLtMatrixTransformDescCreate", "library"); + subst("cublasLtMatrixTransformDescDestroy", "hipblasLtMatrixTransformDescDestroy", "library"); + subst("cublasLtMatrixTransformDescGetAttribute", "hipblasLtMatrixTransformDescGetAttribute", "library"); + subst("cublasLtMatrixTransformDescSetAttribute", "hipblasLtMatrixTransformDescSetAttribute", "library"); + subst("cublasNrm2Ex", "hipblasNrm2Ex_v2", "library"); + subst("cublasRotEx", "hipblasRotEx_v2", "library"); + subst("cublasSasum", "hipblasSasum", "library"); + subst("cublasSasum_64", "hipblasSasum_64", "library"); + subst("cublasSasum_v2", "hipblasSasum", "library"); + subst("cublasSasum_v2_64", "hipblasSasum_64", "library"); + subst("cublasSaxpy", "hipblasSaxpy", "library"); + subst("cublasSaxpy_64", "hipblasSaxpy_64", "library"); + subst("cublasSaxpy_v2", "hipblasSaxpy", "library"); + subst("cublasSaxpy_v2_64", "hipblasSaxpy_64", "library"); + subst("cublasScalEx", "hipblasScalEx_v2", "library"); + subst("cublasScasum", "hipblasScasum_v2", "library"); + subst("cublasScasum_64", "hipblasScasum_v2_64", "library"); + subst("cublasScasum_v2", "hipblasScasum_v2", "library"); + subst("cublasScasum_v2_64", "hipblasScasum_v2_64", "library"); + subst("cublasScnrm2", "hipblasScnrm2_v2", "library"); + subst("cublasScnrm2_64", "hipblasScnrm2_v2_64", "library"); + subst("cublasScnrm2_v2", "hipblasScnrm2_v2", "library"); + subst("cublasScnrm2_v2_64", "hipblasScnrm2_v2_64", "library"); + subst("cublasScopy", "hipblasScopy", "library"); + subst("cublasScopy_64", "hipblasScopy_64", "library"); + subst("cublasScopy_v2", "hipblasScopy", "library"); + subst("cublasScopy_v2_64", "hipblasScopy_64", "library"); + subst("cublasSdgmm", "hipblasSdgmm", "library"); + subst("cublasSdot", "hipblasSdot", "library"); + subst("cublasSdot_64", "hipblasSdot_64", "library"); + subst("cublasSdot_v2", "hipblasSdot", "library"); + subst("cublasSdot_v2_64", "hipblasSdot_64", "library"); + subst("cublasSetAtomicsMode", "hipblasSetAtomicsMode", "library"); + subst("cublasSetMathMode", "hipblasSetMathMode", "library"); + subst("cublasSetMatrix", "hipblasSetMatrix", "library"); + subst("cublasSetMatrixAsync", "hipblasSetMatrixAsync", "library"); + subst("cublasSetPointerMode", "hipblasSetPointerMode", "library"); + subst("cublasSetPointerMode_v2", "hipblasSetPointerMode", "library"); + subst("cublasSetStream", "hipblasSetStream", "library"); + subst("cublasSetStream_v2", "hipblasSetStream", "library"); + subst("cublasSetVector", "hipblasSetVector", "library"); + subst("cublasSetVectorAsync", "hipblasSetVectorAsync", "library"); + subst("cublasSgbmv", "hipblasSgbmv", "library"); + subst("cublasSgbmv_v2", "hipblasSgbmv", "library"); + subst("cublasSgeam", "hipblasSgeam", "library"); + subst("cublasSgelsBatched", "hipblasSgelsBatched", "library"); + subst("cublasSgemm", "hipblasSgemm", "library"); + subst("cublasSgemmBatched", "hipblasSgemmBatched", "library"); + subst("cublasSgemmStridedBatched", "hipblasSgemmStridedBatched", "library"); + subst("cublasSgemm_v2", "hipblasSgemm", "library"); + subst("cublasSgemv", "hipblasSgemv", "library"); + subst("cublasSgemvBatched", "hipblasSgemvBatched", "library"); + subst("cublasSgemvStridedBatched", "hipblasSgemvStridedBatched", "library"); + subst("cublasSgemv_v2", "hipblasSgemv", "library"); + subst("cublasSgeqrfBatched", "hipblasSgeqrfBatched", "library"); + subst("cublasSger", "hipblasSger", "library"); + subst("cublasSger_v2", "hipblasSger", "library"); + subst("cublasSgetrfBatched", "hipblasSgetrfBatched", "library"); + subst("cublasSgetriBatched", "hipblasSgetriBatched", "library"); + subst("cublasSgetrsBatched", "hipblasSgetrsBatched", "library"); + subst("cublasSnrm2", "hipblasSnrm2", "library"); + subst("cublasSnrm2_64", "hipblasSnrm2_64", "library"); + subst("cublasSnrm2_v2", "hipblasSnrm2", "library"); + subst("cublasSnrm2_v2_64", "hipblasSnrm2_64", "library"); + subst("cublasSrot", "hipblasSrot", "library"); + subst("cublasSrot_64", "hipblasSrot_64", "library"); + subst("cublasSrot_v2", "hipblasSrot", "library"); + subst("cublasSrot_v2_64", "hipblasSrot_64", "library"); + subst("cublasSrotg", "hipblasSrotg", "library"); + subst("cublasSrotg_v2", "hipblasSrotg", "library"); + subst("cublasSrotm", "hipblasSrotm", "library"); + subst("cublasSrotm_64", "hipblasSrotm_64", "library"); + subst("cublasSrotm_v2", "hipblasSrotm", "library"); + subst("cublasSrotm_v2_64", "hipblasSrotm_64", "library"); + subst("cublasSrotmg", "hipblasSrotmg", "library"); + subst("cublasSrotmg_v2", "hipblasSrotmg", "library"); + subst("cublasSsbmv", "hipblasSsbmv", "library"); + subst("cublasSsbmv_v2", "hipblasSsbmv", "library"); + subst("cublasSscal", "hipblasSscal", "library"); + subst("cublasSscal_64", "hipblasSscal_64", "library"); + subst("cublasSscal_v2", "hipblasSscal", "library"); + subst("cublasSscal_v2_64", "hipblasSscal_64", "library"); + subst("cublasSspmv", "hipblasSspmv", "library"); + subst("cublasSspmv_v2", "hipblasSspmv", "library"); + subst("cublasSspr", "hipblasSspr", "library"); + subst("cublasSspr2", "hipblasSspr2", "library"); + subst("cublasSspr2_v2", "hipblasSspr2", "library"); + subst("cublasSspr_v2", "hipblasSspr", "library"); + subst("cublasSswap", "hipblasSswap", "library"); + subst("cublasSswap_64", "hipblasSswap_64", "library"); + subst("cublasSswap_v2", "hipblasSswap", "library"); + subst("cublasSswap_v2_64", "hipblasSswap_64", "library"); + subst("cublasSsymm", "hipblasSsymm", "library"); + subst("cublasSsymm_v2", "hipblasSsymm", "library"); + subst("cublasSsymv", "hipblasSsymv", "library"); + subst("cublasSsymv_v2", "hipblasSsymv", "library"); + subst("cublasSsyr", "hipblasSsyr", "library"); + subst("cublasSsyr2", "hipblasSsyr2", "library"); + subst("cublasSsyr2_v2", "hipblasSsyr2", "library"); + subst("cublasSsyr2k", "hipblasSsyr2k", "library"); + subst("cublasSsyr2k_v2", "hipblasSsyr2k", "library"); + subst("cublasSsyr_v2", "hipblasSsyr", "library"); + subst("cublasSsyrk", "hipblasSsyrk", "library"); + subst("cublasSsyrk_v2", "hipblasSsyrk", "library"); + subst("cublasSsyrkx", "hipblasSsyrkx", "library"); + subst("cublasStbmv", "hipblasStbmv", "library"); + subst("cublasStbmv_v2", "hipblasStbmv", "library"); + subst("cublasStbsv", "hipblasStbsv", "library"); + subst("cublasStbsv_v2", "hipblasStbsv", "library"); + subst("cublasStpmv", "hipblasStpmv", "library"); + subst("cublasStpmv_v2", "hipblasStpmv", "library"); + subst("cublasStpsv", "hipblasStpsv", "library"); + subst("cublasStpsv_v2", "hipblasStpsv", "library"); + subst("cublasStrmm", "hipblasStrmm", "library"); + subst("cublasStrmm_v2", "hipblasStrmm", "library"); + subst("cublasStrmv", "hipblasStrmv", "library"); + subst("cublasStrmv_v2", "hipblasStrmv", "library"); + subst("cublasStrsm", "hipblasStrsm", "library"); + subst("cublasStrsmBatched", "hipblasStrsmBatched", "library"); + subst("cublasStrsm_v2", "hipblasStrsm", "library"); + subst("cublasStrsv", "hipblasStrsv", "library"); + subst("cublasStrsv_v2", "hipblasStrsv", "library"); + subst("cublasZaxpy", "hipblasZaxpy_v2", "library"); + subst("cublasZaxpy_64", "hipblasZaxpy_v2_64", "library"); + subst("cublasZaxpy_v2", "hipblasZaxpy_v2", "library"); + subst("cublasZaxpy_v2_64", "hipblasZaxpy_v2_64", "library"); + subst("cublasZcopy", "hipblasZcopy_v2", "library"); + subst("cublasZcopy_64", "hipblasZcopy_v2_64", "library"); + subst("cublasZcopy_v2", "hipblasZcopy_v2", "library"); + subst("cublasZcopy_v2_64", "hipblasZcopy_v2_64", "library"); + subst("cublasZdgmm", "hipblasZdgmm_v2", "library"); + subst("cublasZdotc", "hipblasZdotc_v2", "library"); + subst("cublasZdotc_64", "hipblasZdotc_v2_64", "library"); + subst("cublasZdotc_v2", "hipblasZdotc_v2", "library"); + subst("cublasZdotc_v2_64", "hipblasZdotc_v2_64", "library"); + subst("cublasZdotu", "hipblasZdotu_v2", "library"); + subst("cublasZdotu_64", "hipblasZdotu_v2_64", "library"); + subst("cublasZdotu_v2", "hipblasZdotu_v2", "library"); + subst("cublasZdotu_v2_64", "hipblasZdotu_v2_64", "library"); + subst("cublasZdrot", "hipblasZdrot_v2", "library"); + subst("cublasZdrot_64", "hipblasZdrot_v2_64", "library"); + subst("cublasZdrot_v2", "hipblasZdrot_v2", "library"); + subst("cublasZdrot_v2_64", "hipblasZdrot_v2_64", "library"); + subst("cublasZdscal", "hipblasZdscal_v2", "library"); + subst("cublasZdscal_64", "hipblasZdscal_v2_64", "library"); + subst("cublasZdscal_v2", "hipblasZdscal_v2", "library"); + subst("cublasZdscal_v2_64", "hipblasZdscal_v2_64", "library"); + subst("cublasZgbmv", "hipblasZgbmv_v2", "library"); + subst("cublasZgbmv_v2", "hipblasZgbmv_v2", "library"); + subst("cublasZgeam", "hipblasZgeam_v2", "library"); + subst("cublasZgelsBatched", "hipblasZgelsBatched_v2", "library"); + subst("cublasZgemm", "hipblasZgemm_v2", "library"); + subst("cublasZgemmBatched", "hipblasZgemmBatched_v2", "library"); + subst("cublasZgemmStridedBatched", "hipblasZgemmStridedBatched_v2", "library"); + subst("cublasZgemm_v2", "hipblasZgemm_v2", "library"); + subst("cublasZgemv", "hipblasZgemv_v2", "library"); + subst("cublasZgemvBatched", "hipblasZgemvBatched_v2", "library"); + subst("cublasZgemvStridedBatched", "hipblasZgemvStridedBatched_v2", "library"); + subst("cublasZgemv_v2", "hipblasZgemv_v2", "library"); + subst("cublasZgeqrfBatched", "hipblasZgeqrfBatched_v2", "library"); + subst("cublasZgerc", "hipblasZgerc_v2", "library"); + subst("cublasZgerc_v2", "hipblasZgerc_v2", "library"); + subst("cublasZgeru", "hipblasZgeru_v2", "library"); + subst("cublasZgeru_v2", "hipblasZgeru_v2", "library"); + subst("cublasZgetrfBatched", "hipblasZgetrfBatched_v2", "library"); + subst("cublasZgetriBatched", "hipblasZgetriBatched_v2", "library"); + subst("cublasZgetrsBatched", "hipblasZgetrsBatched_v2", "library"); + subst("cublasZhbmv", "hipblasZhbmv_v2", "library"); + subst("cublasZhbmv_v2", "hipblasZhbmv_v2", "library"); + subst("cublasZhemm", "hipblasZhemm_v2", "library"); + subst("cublasZhemm_v2", "hipblasZhemm_v2", "library"); + subst("cublasZhemv", "hipblasZhemv_v2", "library"); + subst("cublasZhemv_v2", "hipblasZhemv_v2", "library"); + subst("cublasZher", "hipblasZher_v2", "library"); + subst("cublasZher2", "hipblasZher2_v2", "library"); + subst("cublasZher2_v2", "hipblasZher2_v2", "library"); + subst("cublasZher2k", "hipblasZher2k_v2", "library"); + subst("cublasZher2k_v2", "hipblasZher2k_v2", "library"); + subst("cublasZher_v2", "hipblasZher_v2", "library"); + subst("cublasZherk", "hipblasZherk_v2", "library"); + subst("cublasZherk_v2", "hipblasZherk_v2", "library"); + subst("cublasZherkx", "hipblasZherkx_v2", "library"); + subst("cublasZhpmv", "hipblasZhpmv_v2", "library"); + subst("cublasZhpmv_v2", "hipblasZhpmv_v2", "library"); + subst("cublasZhpr", "hipblasZhpr_v2", "library"); + subst("cublasZhpr2", "hipblasZhpr2_v2", "library"); + subst("cublasZhpr2_v2", "hipblasZhpr2_v2", "library"); + subst("cublasZhpr_v2", "hipblasZhpr_v2", "library"); + subst("cublasZrot", "hipblasZrot_v2", "library"); + subst("cublasZrot_64", "hipblasZrot_v2_64", "library"); + subst("cublasZrot_v2", "hipblasZrot_v2", "library"); + subst("cublasZrot_v2_64", "hipblasZrot_v2_64", "library"); + subst("cublasZrotg", "hipblasZrotg_v2", "library"); + subst("cublasZrotg_v2", "hipblasZrotg_v2", "library"); + subst("cublasZscal", "hipblasZscal_v2", "library"); + subst("cublasZscal_64", "hipblasZscal_v2_64", "library"); + subst("cublasZscal_v2", "hipblasZscal_v2", "library"); + subst("cublasZscal_v2_64", "hipblasZscal_v2_64", "library"); + subst("cublasZswap", "hipblasZswap_v2", "library"); + subst("cublasZswap_64", "hipblasZswap_v2_64", "library"); + subst("cublasZswap_v2", "hipblasZswap_v2", "library"); + subst("cublasZswap_v2_64", "hipblasZswap_v2_64", "library"); + subst("cublasZsymm", "hipblasZsymm_v2", "library"); + subst("cublasZsymm_v2", "hipblasZsymm_v2", "library"); + subst("cublasZsymv", "hipblasZsymv_v2", "library"); + subst("cublasZsymv_v2", "hipblasZsymv_v2", "library"); + subst("cublasZsyr", "hipblasZsyr_v2", "library"); + subst("cublasZsyr2", "hipblasZsyr2_v2", "library"); + subst("cublasZsyr2_v2", "hipblasZsyr2_v2", "library"); + subst("cublasZsyr2k", "hipblasZsyr2k_v2", "library"); + subst("cublasZsyr2k_v2", "hipblasZsyr2k_v2", "library"); + subst("cublasZsyr_v2", "hipblasZsyr_v2", "library"); + subst("cublasZsyrk", "hipblasZsyrk_v2", "library"); + subst("cublasZsyrk_v2", "hipblasZsyrk_v2", "library"); + subst("cublasZsyrkx", "hipblasZsyrkx_v2", "library"); + subst("cublasZtbmv", "hipblasZtbmv_v2", "library"); + subst("cublasZtbmv_v2", "hipblasZtbmv_v2", "library"); + subst("cublasZtbsv", "hipblasZtbsv_v2", "library"); + subst("cublasZtbsv_v2", "hipblasZtbsv_v2", "library"); + subst("cublasZtpmv", "hipblasZtpmv_v2", "library"); + subst("cublasZtpmv_v2", "hipblasZtpmv_v2", "library"); + subst("cublasZtpsv", "hipblasZtpsv_v2", "library"); + subst("cublasZtpsv_v2", "hipblasZtpsv_v2", "library"); + subst("cublasZtrmm", "hipblasZtrmm_v2", "library"); + subst("cublasZtrmm_v2", "hipblasZtrmm_v2", "library"); + subst("cublasZtrmv", "hipblasZtrmv_v2", "library"); + subst("cublasZtrmv_v2", "hipblasZtrmv_v2", "library"); + subst("cublasZtrsm", "hipblasZtrsm_v2", "library"); + subst("cublasZtrsmBatched", "hipblasZtrsmBatched_v2", "library"); + subst("cublasZtrsm_v2", "hipblasZtrsm_v2", "library"); + subst("cublasZtrsv", "hipblasZtrsv_v2", "library"); + subst("cublasZtrsv_v2", "hipblasZtrsv_v2", "library"); + subst("cuda_stream", "hip_stream", "library"); + subst("cudnnActivationBackward", "hipdnnActivationBackward", "library"); + subst("cudnnActivationForward", "hipdnnActivationForward", "library"); + subst("cudnnAddTensor", "hipdnnAddTensor", "library"); + subst("cudnnBatchNormalizationBackward", "hipdnnBatchNormalizationBackward", "library"); + subst("cudnnBatchNormalizationForwardInference", "hipdnnBatchNormalizationForwardInference", "library"); + subst("cudnnBatchNormalizationForwardTraining", "hipdnnBatchNormalizationForwardTraining", "library"); + subst("cudnnConvolutionBackwardBias", "hipdnnConvolutionBackwardBias", "library"); + subst("cudnnConvolutionBackwardData", "hipdnnConvolutionBackwardData", "library"); + subst("cudnnConvolutionBackwardFilter", "hipdnnConvolutionBackwardFilter", "library"); + subst("cudnnConvolutionForward", "hipdnnConvolutionForward", "library"); + subst("cudnnCreate", "hipdnnCreate", "library"); + subst("cudnnCreateActivationDescriptor", "hipdnnCreateActivationDescriptor", "library"); + subst("cudnnCreateConvolutionDescriptor", "hipdnnCreateConvolutionDescriptor", "library"); + subst("cudnnCreateDropoutDescriptor", "hipdnnCreateDropoutDescriptor", "library"); + subst("cudnnCreateFilterDescriptor", "hipdnnCreateFilterDescriptor", "library"); + subst("cudnnCreateLRNDescriptor", "hipdnnCreateLRNDescriptor", "library"); + subst("cudnnCreateOpTensorDescriptor", "hipdnnCreateOpTensorDescriptor", "library"); + subst("cudnnCreatePersistentRNNPlan", "hipdnnCreatePersistentRNNPlan", "library"); + subst("cudnnCreatePoolingDescriptor", "hipdnnCreatePoolingDescriptor", "library"); + subst("cudnnCreateRNNDescriptor", "hipdnnCreateRNNDescriptor", "library"); + subst("cudnnCreateReduceTensorDescriptor", "hipdnnCreateReduceTensorDescriptor", "library"); + subst("cudnnCreateTensorDescriptor", "hipdnnCreateTensorDescriptor", "library"); + subst("cudnnDeriveBNTensorDescriptor", "hipdnnDeriveBNTensorDescriptor", "library"); + subst("cudnnDestroy", "hipdnnDestroy", "library"); + subst("cudnnDestroyActivationDescriptor", "hipdnnDestroyActivationDescriptor", "library"); + subst("cudnnDestroyConvolutionDescriptor", "hipdnnDestroyConvolutionDescriptor", "library"); + subst("cudnnDestroyDropoutDescriptor", "hipdnnDestroyDropoutDescriptor", "library"); + subst("cudnnDestroyFilterDescriptor", "hipdnnDestroyFilterDescriptor", "library"); + subst("cudnnDestroyLRNDescriptor", "hipdnnDestroyLRNDescriptor", "library"); + subst("cudnnDestroyOpTensorDescriptor", "hipdnnDestroyOpTensorDescriptor", "library"); + subst("cudnnDestroyPersistentRNNPlan", "hipdnnDestroyPersistentRNNPlan", "library"); + subst("cudnnDestroyPoolingDescriptor", "hipdnnDestroyPoolingDescriptor", "library"); + subst("cudnnDestroyRNNDescriptor", "hipdnnDestroyRNNDescriptor", "library"); + subst("cudnnDestroyReduceTensorDescriptor", "hipdnnDestroyReduceTensorDescriptor", "library"); + subst("cudnnDestroyTensorDescriptor", "hipdnnDestroyTensorDescriptor", "library"); + subst("cudnnDropoutGetStatesSize", "hipdnnDropoutGetStatesSize", "library"); + subst("cudnnFindConvolutionBackwardDataAlgorithm", "hipdnnFindConvolutionBackwardDataAlgorithm", "library"); + subst("cudnnFindConvolutionBackwardDataAlgorithmEx", "hipdnnFindConvolutionBackwardDataAlgorithmEx", "library"); + subst("cudnnFindConvolutionBackwardFilterAlgorithm", "hipdnnFindConvolutionBackwardFilterAlgorithm", "library"); + subst("cudnnFindConvolutionBackwardFilterAlgorithmEx", "hipdnnFindConvolutionBackwardFilterAlgorithmEx", "library"); + subst("cudnnFindConvolutionForwardAlgorithm", "hipdnnFindConvolutionForwardAlgorithm", "library"); + subst("cudnnFindConvolutionForwardAlgorithmEx", "hipdnnFindConvolutionForwardAlgorithmEx", "library"); + subst("cudnnGetActivationDescriptor", "hipdnnGetActivationDescriptor", "library"); + subst("cudnnGetConvolution2dDescriptor", "hipdnnGetConvolution2dDescriptor", "library"); + subst("cudnnGetConvolution2dForwardOutputDim", "hipdnnGetConvolution2dForwardOutputDim", "library"); + subst("cudnnGetConvolutionBackwardDataAlgorithm", "hipdnnGetConvolutionBackwardDataAlgorithm", "library"); + subst("cudnnGetConvolutionBackwardDataWorkspaceSize", "hipdnnGetConvolutionBackwardDataWorkspaceSize", "library"); + subst("cudnnGetConvolutionBackwardFilterAlgorithm", "hipdnnGetConvolutionBackwardFilterAlgorithm", "library"); + subst("cudnnGetConvolutionBackwardFilterWorkspaceSize", "hipdnnGetConvolutionBackwardFilterWorkspaceSize", "library"); + subst("cudnnGetConvolutionForwardAlgorithm", "hipdnnGetConvolutionForwardAlgorithm", "library"); + subst("cudnnGetConvolutionForwardWorkspaceSize", "hipdnnGetConvolutionForwardWorkspaceSize", "library"); + subst("cudnnGetErrorString", "hipdnnGetErrorString", "library"); + subst("cudnnGetFilter4dDescriptor", "hipdnnGetFilter4dDescriptor", "library"); + subst("cudnnGetFilterNdDescriptor", "hipdnnGetFilterNdDescriptor", "library"); + subst("cudnnGetLRNDescriptor", "hipdnnGetLRNDescriptor", "library"); + subst("cudnnGetOpTensorDescriptor", "hipdnnGetOpTensorDescriptor", "library"); + subst("cudnnGetPooling2dDescriptor", "hipdnnGetPooling2dDescriptor", "library"); + subst("cudnnGetPooling2dForwardOutputDim", "hipdnnGetPooling2dForwardOutputDim", "library"); + subst("cudnnGetRNNDescriptor", "hipdnnGetRNNDescriptor", "library"); + subst("cudnnGetRNNLinLayerBiasParams", "hipdnnGetRNNLinLayerBiasParams", "library"); + subst("cudnnGetRNNLinLayerMatrixParams", "hipdnnGetRNNLinLayerMatrixParams", "library"); + subst("cudnnGetRNNParamsSize", "hipdnnGetRNNParamsSize", "library"); + subst("cudnnGetRNNTrainingReserveSize", "hipdnnGetRNNTrainingReserveSize", "library"); + subst("cudnnGetRNNWorkspaceSize", "hipdnnGetRNNWorkspaceSize", "library"); + subst("cudnnGetReduceTensorDescriptor", "hipdnnGetReduceTensorDescriptor", "library"); + subst("cudnnGetReductionWorkspaceSize", "hipdnnGetReductionWorkspaceSize", "library"); + subst("cudnnGetStream", "hipdnnGetStream", "library"); + subst("cudnnGetTensor4dDescriptor", "hipdnnGetTensor4dDescriptor", "library"); + subst("cudnnGetTensorNdDescriptor", "hipdnnGetTensorNdDescriptor", "library"); + subst("cudnnGetVersion", "hipdnnGetVersion", "library"); + subst("cudnnLRNCrossChannelBackward", "hipdnnLRNCrossChannelBackward", "library"); + subst("cudnnLRNCrossChannelForward", "hipdnnLRNCrossChannelForward", "library"); + subst("cudnnOpTensor", "hipdnnOpTensor", "library"); + subst("cudnnPoolingBackward", "hipdnnPoolingBackward", "library"); + subst("cudnnPoolingForward", "hipdnnPoolingForward", "library"); + subst("cudnnRNNBackwardData", "hipdnnRNNBackwardData", "library"); + subst("cudnnRNNBackwardWeights", "hipdnnRNNBackwardWeights", "library"); + subst("cudnnRNNForwardInference", "hipdnnRNNForwardInference", "library"); + subst("cudnnRNNForwardTraining", "hipdnnRNNForwardTraining", "library"); + subst("cudnnReduceTensor", "hipdnnReduceTensor", "library"); + subst("cudnnScaleTensor", "hipdnnScaleTensor", "library"); + subst("cudnnSetActivationDescriptor", "hipdnnSetActivationDescriptor", "library"); + subst("cudnnSetConvolution2dDescriptor", "hipdnnSetConvolution2dDescriptor", "library"); + subst("cudnnSetConvolutionGroupCount", "hipdnnSetConvolutionGroupCount", "library"); + subst("cudnnSetConvolutionMathType", "hipdnnSetConvolutionMathType", "library"); + subst("cudnnSetConvolutionNdDescriptor", "hipdnnSetConvolutionNdDescriptor", "library"); + subst("cudnnSetDropoutDescriptor", "hipdnnSetDropoutDescriptor", "library"); + subst("cudnnSetFilter4dDescriptor", "hipdnnSetFilter4dDescriptor", "library"); + subst("cudnnSetFilterNdDescriptor", "hipdnnSetFilterNdDescriptor", "library"); + subst("cudnnSetLRNDescriptor", "hipdnnSetLRNDescriptor", "library"); + subst("cudnnSetOpTensorDescriptor", "hipdnnSetOpTensorDescriptor", "library"); + subst("cudnnSetPersistentRNNPlan", "hipdnnSetPersistentRNNPlan", "library"); + subst("cudnnSetPooling2dDescriptor", "hipdnnSetPooling2dDescriptor", "library"); + subst("cudnnSetPoolingNdDescriptor", "hipdnnSetPoolingNdDescriptor", "library"); + subst("cudnnSetRNNDescriptor", "hipdnnSetRNNDescriptor", "library"); + subst("cudnnSetRNNDescriptor_v5", "hipdnnSetRNNDescriptor_v5", "library"); + subst("cudnnSetRNNDescriptor_v6", "hipdnnSetRNNDescriptor_v6", "library"); + subst("cudnnSetReduceTensorDescriptor", "hipdnnSetReduceTensorDescriptor", "library"); + subst("cudnnSetStream", "hipdnnSetStream", "library"); + subst("cudnnSetTensor", "hipdnnSetTensor", "library"); + subst("cudnnSetTensor4dDescriptor", "hipdnnSetTensor4dDescriptor", "library"); + subst("cudnnSetTensor4dDescriptorEx", "hipdnnSetTensor4dDescriptorEx", "library"); + subst("cudnnSetTensorNdDescriptor", "hipdnnSetTensorNdDescriptor", "library"); + subst("cudnnSoftmaxBackward", "hipdnnSoftmaxBackward", "library"); + subst("cudnnSoftmaxForward", "hipdnnSoftmaxForward", "library"); + subst("cufftCallbackLoadC", "hipfftCallbackLoadC", "library"); + subst("cufftCallbackLoadD", "hipfftCallbackLoadD", "library"); + subst("cufftCallbackLoadR", "hipfftCallbackLoadR", "library"); + subst("cufftCallbackLoadZ", "hipfftCallbackLoadZ", "library"); + subst("cufftCallbackStoreC", "hipfftCallbackStoreC", "library"); + subst("cufftCallbackStoreD", "hipfftCallbackStoreD", "library"); + subst("cufftCallbackStoreR", "hipfftCallbackStoreR", "library"); + subst("cufftCallbackStoreZ", "hipfftCallbackStoreZ", "library"); + subst("cufftCreate", "hipfftCreate", "library"); + subst("cufftDestroy", "hipfftDestroy", "library"); + subst("cufftEstimate1d", "hipfftEstimate1d", "library"); + subst("cufftEstimate2d", "hipfftEstimate2d", "library"); + subst("cufftEstimate3d", "hipfftEstimate3d", "library"); + subst("cufftEstimateMany", "hipfftEstimateMany", "library"); + subst("cufftExecC2C", "hipfftExecC2C", "library"); + subst("cufftExecC2R", "hipfftExecC2R", "library"); + subst("cufftExecD2Z", "hipfftExecD2Z", "library"); + subst("cufftExecR2C", "hipfftExecR2C", "library"); + subst("cufftExecZ2D", "hipfftExecZ2D", "library"); + subst("cufftExecZ2Z", "hipfftExecZ2Z", "library"); + subst("cufftGetProperty", "hipfftGetProperty", "library"); + subst("cufftGetSize", "hipfftGetSize", "library"); + subst("cufftGetSize1d", "hipfftGetSize1d", "library"); + subst("cufftGetSize2d", "hipfftGetSize2d", "library"); + subst("cufftGetSize3d", "hipfftGetSize3d", "library"); + subst("cufftGetSizeMany", "hipfftGetSizeMany", "library"); + subst("cufftGetSizeMany64", "hipfftGetSizeMany64", "library"); + subst("cufftGetVersion", "hipfftGetVersion", "library"); + subst("cufftMakePlan1d", "hipfftMakePlan1d", "library"); + subst("cufftMakePlan2d", "hipfftMakePlan2d", "library"); + subst("cufftMakePlan3d", "hipfftMakePlan3d", "library"); + subst("cufftMakePlanMany", "hipfftMakePlanMany", "library"); + subst("cufftMakePlanMany64", "hipfftMakePlanMany64", "library"); + subst("cufftPlan1d", "hipfftPlan1d", "library"); + subst("cufftPlan2d", "hipfftPlan2d", "library"); + subst("cufftPlan3d", "hipfftPlan3d", "library"); + subst("cufftPlanMany", "hipfftPlanMany", "library"); + subst("cufftSetAutoAllocation", "hipfftSetAutoAllocation", "library"); + subst("cufftSetStream", "hipfftSetStream", "library"); + subst("cufftSetWorkArea", "hipfftSetWorkArea", "library"); + subst("cufftXtClearCallback", "hipfftXtClearCallback", "library"); + subst("cufftXtSetCallback", "hipfftXtSetCallback", "library"); + subst("cufftXtSetCallbackSharedSize", "hipfftXtSetCallbackSharedSize", "library"); + subst("curandCreateGenerator", "hiprandCreateGenerator", "library"); + subst("curandCreateGeneratorHost", "hiprandCreateGeneratorHost", "library"); + subst("curandCreatePoissonDistribution", "hiprandCreatePoissonDistribution", "library"); + subst("curandDestroyDistribution", "hiprandDestroyDistribution", "library"); + subst("curandDestroyGenerator", "hiprandDestroyGenerator", "library"); + subst("curandGenerate", "hiprandGenerate", "library"); + subst("curandGenerateLogNormal", "hiprandGenerateLogNormal", "library"); + subst("curandGenerateLogNormalDouble", "hiprandGenerateLogNormalDouble", "library"); + subst("curandGenerateNormal", "hiprandGenerateNormal", "library"); + subst("curandGenerateNormalDouble", "hiprandGenerateNormalDouble", "library"); + subst("curandGeneratePoisson", "hiprandGeneratePoisson", "library"); + subst("curandGenerateSeeds", "hiprandGenerateSeeds", "library"); + subst("curandGenerateUniform", "hiprandGenerateUniform", "library"); + subst("curandGenerateUniformDouble", "hiprandGenerateUniformDouble", "library"); + subst("curandGetDirectionVectors32", "hiprandGetDirectionVectors32", "library"); + subst("curandGetDirectionVectors64", "hiprandGetDirectionVectors64", "library"); + subst("curandGetScrambleConstants32", "hiprandGetScrambleConstants32", "library"); + subst("curandGetScrambleConstants64", "hiprandGetScrambleConstants64", "library"); + subst("curandGetVersion", "hiprandGetVersion", "library"); + subst("curandMakeMTGP32Constants", "hiprandMakeMTGP32Constants", "library"); + subst("curandMakeMTGP32KernelState", "hiprandMakeMTGP32KernelState", "library"); + subst("curandSetGeneratorOffset", "hiprandSetGeneratorOffset", "library"); + subst("curandSetPseudoRandomGeneratorSeed", "hiprandSetPseudoRandomGeneratorSeed", "library"); + subst("curandSetQuasiRandomGeneratorDimensions", "hiprandSetQuasiRandomGeneratorDimensions", "library"); + subst("curandSetStream", "hiprandSetStream", "library"); + subst("cusolverDnCCgels", "hipsolverDnCCgels", "library"); + subst("cusolverDnCCgels_bufferSize", "hipsolverDnCCgels_bufferSize", "library"); + subst("cusolverDnCCgesv", "hipsolverDnCCgesv", "library"); + subst("cusolverDnCCgesv_bufferSize", "hipsolverDnCCgesv_bufferSize", "library"); + subst("cusolverDnCgebrd", "hipsolverDnCgebrd", "library"); + subst("cusolverDnCgebrd_bufferSize", "hipsolverDnCgebrd_bufferSize", "library"); + subst("cusolverDnCgeqrf", "hipsolverDnCgeqrf", "library"); + subst("cusolverDnCgeqrf_bufferSize", "hipsolverDnCgeqrf_bufferSize", "library"); + subst("cusolverDnCgesvd", "hipsolverDnCgesvd", "library"); + subst("cusolverDnCgesvd_bufferSize", "hipsolverDnCgesvd_bufferSize", "library"); + subst("cusolverDnCgesvdaStridedBatched", "hipsolverDnCgesvdaStridedBatched", "library"); + subst("cusolverDnCgesvdaStridedBatched_bufferSize", "hipsolverDnCgesvdaStridedBatched_bufferSize", "library"); + subst("cusolverDnCgesvdj", "hipsolverDnCgesvdj", "library"); + subst("cusolverDnCgesvdjBatched", "hipsolverDnCgesvdjBatched", "library"); + subst("cusolverDnCgesvdjBatched_bufferSize", "hipsolverDnCgesvdjBatched_bufferSize", "library"); + subst("cusolverDnCgesvdj_bufferSize", "hipsolverDnCgesvdj_bufferSize", "library"); + subst("cusolverDnCgetrf", "hipsolverDnCgetrf", "library"); + subst("cusolverDnCgetrf_bufferSize", "hipsolverDnCgetrf_bufferSize", "library"); + subst("cusolverDnCgetrs", "hipsolverDnCgetrs", "library"); + subst("cusolverDnCheevd", "hipsolverDnCheevd", "library"); + subst("cusolverDnCheevd_bufferSize", "hipsolverDnCheevd_bufferSize", "library"); + subst("cusolverDnCheevdx", "hipsolverDnCheevdx", "library"); + subst("cusolverDnCheevdx_bufferSize", "hipsolverDnCheevdx_bufferSize", "library"); + subst("cusolverDnCheevj", "hipsolverDnCheevj", "library"); + subst("cusolverDnCheevjBatched", "hipsolverDnCheevjBatched", "library"); + subst("cusolverDnCheevjBatched_bufferSize", "hipsolverDnCheevjBatched_bufferSize", "library"); + subst("cusolverDnCheevj_bufferSize", "hipsolverDnCheevj_bufferSize", "library"); + subst("cusolverDnChegvd", "hipsolverDnChegvd", "library"); + subst("cusolverDnChegvd_bufferSize", "hipsolverDnChegvd_bufferSize", "library"); + subst("cusolverDnChegvdx", "hipsolverDnChegvdx", "library"); + subst("cusolverDnChegvdx_bufferSize", "hipsolverDnChegvdx_bufferSize", "library"); + subst("cusolverDnChegvj", "hipsolverDnChegvj", "library"); + subst("cusolverDnChegvj_bufferSize", "hipsolverDnChegvj_bufferSize", "library"); + subst("cusolverDnChetrd", "hipsolverDnChetrd", "library"); + subst("cusolverDnChetrd_bufferSize", "hipsolverDnChetrd_bufferSize", "library"); + subst("cusolverDnCpotrf", "hipsolverDnCpotrf", "library"); + subst("cusolverDnCpotrfBatched", "hipsolverDnCpotrfBatched", "library"); + subst("cusolverDnCpotrf_bufferSize", "hipsolverDnCpotrf_bufferSize", "library"); + subst("cusolverDnCpotri", "hipsolverDnCpotri", "library"); + subst("cusolverDnCpotri_bufferSize", "hipsolverDnCpotri_bufferSize", "library"); + subst("cusolverDnCpotrs", "hipsolverDnCpotrs", "library"); + subst("cusolverDnCpotrsBatched", "hipsolverDnCpotrsBatched", "library"); + subst("cusolverDnCreate", "hipsolverDnCreate", "library"); + subst("cusolverDnCreateGesvdjInfo", "hipsolverDnCreateGesvdjInfo", "library"); + subst("cusolverDnCreateSyevjInfo", "hipsolverDnCreateSyevjInfo", "library"); + subst("cusolverDnCsytrf", "hipsolverDnCsytrf", "library"); + subst("cusolverDnCsytrf_bufferSize", "hipsolverDnCsytrf_bufferSize", "library"); + subst("cusolverDnCungbr", "hipsolverDnCungbr", "library"); + subst("cusolverDnCungbr_bufferSize", "hipsolverDnCungbr_bufferSize", "library"); + subst("cusolverDnCungqr", "hipsolverDnCungqr", "library"); + subst("cusolverDnCungqr_bufferSize", "hipsolverDnCungqr_bufferSize", "library"); + subst("cusolverDnCungtr", "hipsolverDnCungtr", "library"); + subst("cusolverDnCungtr_bufferSize", "hipsolverDnCungtr_bufferSize", "library"); + subst("cusolverDnCunmqr", "hipsolverDnCunmqr", "library"); + subst("cusolverDnCunmqr_bufferSize", "hipsolverDnCunmqr_bufferSize", "library"); + subst("cusolverDnCunmtr", "hipsolverDnCunmtr", "library"); + subst("cusolverDnCunmtr_bufferSize", "hipsolverDnCunmtr_bufferSize", "library"); + subst("cusolverDnDDgels", "hipsolverDnDDgels", "library"); + subst("cusolverDnDDgels_bufferSize", "hipsolverDnDDgels_bufferSize", "library"); + subst("cusolverDnDDgesv", "hipsolverDnDDgesv", "library"); + subst("cusolverDnDDgesv_bufferSize", "hipsolverDnDDgesv_bufferSize", "library"); + subst("cusolverDnDestroy", "hipsolverDnDestroy", "library"); + subst("cusolverDnDestroyGesvdjInfo", "hipsolverDnDestroyGesvdjInfo", "library"); + subst("cusolverDnDestroySyevjInfo", "hipsolverDnDestroySyevjInfo", "library"); + subst("cusolverDnDgebrd", "hipsolverDnDgebrd", "library"); + subst("cusolverDnDgebrd_bufferSize", "hipsolverDnDgebrd_bufferSize", "library"); + subst("cusolverDnDgeqrf", "hipsolverDnDgeqrf", "library"); + subst("cusolverDnDgeqrf_bufferSize", "hipsolverDnDgeqrf_bufferSize", "library"); + subst("cusolverDnDgesvd", "hipsolverDnDgesvd", "library"); + subst("cusolverDnDgesvd_bufferSize", "hipsolverDnDgesvd_bufferSize", "library"); + subst("cusolverDnDgesvdaStridedBatched", "hipsolverDnDgesvdaStridedBatched", "library"); + subst("cusolverDnDgesvdaStridedBatched_bufferSize", "hipsolverDnDgesvdaStridedBatched_bufferSize", "library"); + subst("cusolverDnDgesvdj", "hipsolverDnDgesvdj", "library"); + subst("cusolverDnDgesvdjBatched", "hipsolverDnDgesvdjBatched", "library"); + subst("cusolverDnDgesvdjBatched_bufferSize", "hipsolverDnDgesvdjBatched_bufferSize", "library"); + subst("cusolverDnDgesvdj_bufferSize", "hipsolverDnDgesvdj_bufferSize", "library"); + subst("cusolverDnDgetrf", "hipsolverDnDgetrf", "library"); + subst("cusolverDnDgetrf_bufferSize", "hipsolverDnDgetrf_bufferSize", "library"); + subst("cusolverDnDgetrs", "hipsolverDnDgetrs", "library"); + subst("cusolverDnDorgbr", "hipsolverDnDorgbr", "library"); + subst("cusolverDnDorgbr_bufferSize", "hipsolverDnDorgbr_bufferSize", "library"); + subst("cusolverDnDorgqr", "hipsolverDnDorgqr", "library"); + subst("cusolverDnDorgqr_bufferSize", "hipsolverDnDorgqr_bufferSize", "library"); + subst("cusolverDnDorgtr", "hipsolverDnDorgtr", "library"); + subst("cusolverDnDorgtr_bufferSize", "hipsolverDnDorgtr_bufferSize", "library"); + subst("cusolverDnDormqr", "hipsolverDnDormqr", "library"); + subst("cusolverDnDormqr_bufferSize", "hipsolverDnDormqr_bufferSize", "library"); + subst("cusolverDnDormtr", "hipsolverDnDormtr", "library"); + subst("cusolverDnDormtr_bufferSize", "hipsolverDnDormtr_bufferSize", "library"); + subst("cusolverDnDpotrf", "hipsolverDnDpotrf", "library"); + subst("cusolverDnDpotrfBatched", "hipsolverDnDpotrfBatched", "library"); + subst("cusolverDnDpotrf_bufferSize", "hipsolverDnDpotrf_bufferSize", "library"); + subst("cusolverDnDpotri", "hipsolverDnDpotri", "library"); + subst("cusolverDnDpotri_bufferSize", "hipsolverDnDpotri_bufferSize", "library"); + subst("cusolverDnDpotrs", "hipsolverDnDpotrs", "library"); + subst("cusolverDnDpotrsBatched", "hipsolverDnDpotrsBatched", "library"); + subst("cusolverDnDsyevd", "hipsolverDnDsyevd", "library"); + subst("cusolverDnDsyevd_bufferSize", "hipsolverDnDsyevd_bufferSize", "library"); + subst("cusolverDnDsyevdx", "hipsolverDnDsyevdx", "library"); + subst("cusolverDnDsyevdx_bufferSize", "hipsolverDnDsyevdx_bufferSize", "library"); + subst("cusolverDnDsyevj", "hipsolverDnDsyevj", "library"); + subst("cusolverDnDsyevjBatched", "hipsolverDnDsyevjBatched", "library"); + subst("cusolverDnDsyevjBatched_bufferSize", "hipsolverDnDsyevjBatched_bufferSize", "library"); + subst("cusolverDnDsyevj_bufferSize", "hipsolverDnDsyevj_bufferSize", "library"); + subst("cusolverDnDsygvd", "hipsolverDnDsygvd", "library"); + subst("cusolverDnDsygvd_bufferSize", "hipsolverDnDsygvd_bufferSize", "library"); + subst("cusolverDnDsygvdx", "hipsolverDnDsygvdx", "library"); + subst("cusolverDnDsygvdx_bufferSize", "hipsolverDnDsygvdx_bufferSize", "library"); + subst("cusolverDnDsygvj", "hipsolverDnDsygvj", "library"); + subst("cusolverDnDsygvj_bufferSize", "hipsolverDnDsygvj_bufferSize", "library"); + subst("cusolverDnDsytrd", "hipsolverDnDsytrd", "library"); + subst("cusolverDnDsytrd_bufferSize", "hipsolverDnDsytrd_bufferSize", "library"); + subst("cusolverDnDsytrf", "hipsolverDnDsytrf", "library"); + subst("cusolverDnDsytrf_bufferSize", "hipsolverDnDsytrf_bufferSize", "library"); + subst("cusolverDnGetStream", "hipsolverGetStream", "library"); + subst("cusolverDnSSgels", "hipsolverDnSSgels", "library"); + subst("cusolverDnSSgels_bufferSize", "hipsolverDnSSgels_bufferSize", "library"); + subst("cusolverDnSSgesv", "hipsolverDnSSgesv", "library"); + subst("cusolverDnSSgesv_bufferSize", "hipsolverDnSSgesv_bufferSize", "library"); + subst("cusolverDnSetStream", "hipsolverSetStream", "library"); + subst("cusolverDnSgebrd", "hipsolverDnSgebrd", "library"); + subst("cusolverDnSgebrd_bufferSize", "hipsolverDnSgebrd_bufferSize", "library"); + subst("cusolverDnSgeqrf", "hipsolverDnSgeqrf", "library"); + subst("cusolverDnSgeqrf_bufferSize", "hipsolverDnSgeqrf_bufferSize", "library"); + subst("cusolverDnSgesvd", "hipsolverDnSgesvd", "library"); + subst("cusolverDnSgesvd_bufferSize", "hipsolverDnSgesvd_bufferSize", "library"); + subst("cusolverDnSgesvdaStridedBatched", "hipsolverDnSgesvdaStridedBatched", "library"); + subst("cusolverDnSgesvdaStridedBatched_bufferSize", "hipsolverDnSgesvdaStridedBatched_bufferSize", "library"); + subst("cusolverDnSgesvdj", "hipsolverDnSgesvdj", "library"); + subst("cusolverDnSgesvdjBatched", "hipsolverDnSgesvdjBatched", "library"); + subst("cusolverDnSgesvdjBatched_bufferSize", "hipsolverDnSgesvdjBatched_bufferSize", "library"); + subst("cusolverDnSgesvdj_bufferSize", "hipsolverDnSgesvdj_bufferSize", "library"); + subst("cusolverDnSgetrf", "hipsolverDnSgetrf", "library"); + subst("cusolverDnSgetrf_bufferSize", "hipsolverDnSgetrf_bufferSize", "library"); + subst("cusolverDnSgetrs", "hipsolverDnSgetrs", "library"); + subst("cusolverDnSorgbr", "hipsolverDnSorgbr", "library"); + subst("cusolverDnSorgbr_bufferSize", "hipsolverDnSorgbr_bufferSize", "library"); + subst("cusolverDnSorgqr", "hipsolverDnSorgqr", "library"); + subst("cusolverDnSorgqr_bufferSize", "hipsolverDnSorgqr_bufferSize", "library"); + subst("cusolverDnSorgtr", "hipsolverDnSorgtr", "library"); + subst("cusolverDnSorgtr_bufferSize", "hipsolverDnSorgtr_bufferSize", "library"); + subst("cusolverDnSormqr", "hipsolverDnSormqr", "library"); + subst("cusolverDnSormqr_bufferSize", "hipsolverDnSormqr_bufferSize", "library"); + subst("cusolverDnSormtr", "hipsolverDnSormtr", "library"); + subst("cusolverDnSormtr_bufferSize", "hipsolverDnSormtr_bufferSize", "library"); + subst("cusolverDnSpotrf", "hipsolverDnSpotrf", "library"); + subst("cusolverDnSpotrfBatched", "hipsolverDnSpotrfBatched", "library"); + subst("cusolverDnSpotrf_bufferSize", "hipsolverDnSpotrf_bufferSize", "library"); + subst("cusolverDnSpotri", "hipsolverDnSpotri", "library"); + subst("cusolverDnSpotri_bufferSize", "hipsolverDnSpotri_bufferSize", "library"); + subst("cusolverDnSpotrs", "hipsolverDnSpotrs", "library"); + subst("cusolverDnSpotrsBatched", "hipsolverDnSpotrsBatched", "library"); + subst("cusolverDnSsyevd", "hipsolverDnSsyevd", "library"); + subst("cusolverDnSsyevd_bufferSize", "hipsolverDnSsyevd_bufferSize", "library"); + subst("cusolverDnSsyevdx", "hipsolverDnSsyevdx", "library"); + subst("cusolverDnSsyevdx_bufferSize", "hipsolverDnSsyevdx_bufferSize", "library"); + subst("cusolverDnSsyevj", "hipsolverDnSsyevj", "library"); + subst("cusolverDnSsyevjBatched", "hipsolverDnSsyevjBatched", "library"); + subst("cusolverDnSsyevjBatched_bufferSize", "hipsolverDnSsyevjBatched_bufferSize", "library"); + subst("cusolverDnSsyevj_bufferSize", "hipsolverDnSsyevj_bufferSize", "library"); + subst("cusolverDnSsygvd", "hipsolverDnSsygvd", "library"); + subst("cusolverDnSsygvd_bufferSize", "hipsolverDnSsygvd_bufferSize", "library"); + subst("cusolverDnSsygvdx", "hipsolverDnSsygvdx", "library"); + subst("cusolverDnSsygvdx_bufferSize", "hipsolverDnSsygvdx_bufferSize", "library"); + subst("cusolverDnSsygvj", "hipsolverDnSsygvj", "library"); + subst("cusolverDnSsygvj_bufferSize", "hipsolverDnSsygvj_bufferSize", "library"); + subst("cusolverDnSsytrd", "hipsolverDnSsytrd", "library"); + subst("cusolverDnSsytrd_bufferSize", "hipsolverDnSsytrd_bufferSize", "library"); + subst("cusolverDnSsytrf", "hipsolverDnSsytrf", "library"); + subst("cusolverDnSsytrf_bufferSize", "hipsolverDnSsytrf_bufferSize", "library"); + subst("cusolverDnXgesvdjGetResidual", "hipsolverDnXgesvdjGetResidual", "library"); + subst("cusolverDnXgesvdjGetSweeps", "hipsolverDnXgesvdjGetSweeps", "library"); + subst("cusolverDnXgesvdjSetMaxSweeps", "hipsolverDnXgesvdjSetMaxSweeps", "library"); + subst("cusolverDnXgesvdjSetSortEig", "hipsolverDnXgesvdjSetSortEig", "library"); + subst("cusolverDnXgesvdjSetTolerance", "hipsolverDnXgesvdjSetTolerance", "library"); + subst("cusolverDnXsyevjGetResidual", "hipsolverDnXsyevjGetResidual", "library"); + subst("cusolverDnXsyevjGetSweeps", "hipsolverDnXsyevjGetSweeps", "library"); + subst("cusolverDnXsyevjSetMaxSweeps", "hipsolverDnXsyevjSetMaxSweeps", "library"); + subst("cusolverDnXsyevjSetSortEig", "hipsolverDnXsyevjSetSortEig", "library"); + subst("cusolverDnXsyevjSetTolerance", "hipsolverDnXsyevjSetTolerance", "library"); + subst("cusolverDnZZgels", "hipsolverDnZZgels", "library"); + subst("cusolverDnZZgels_bufferSize", "hipsolverDnZZgels_bufferSize", "library"); + subst("cusolverDnZZgesv", "hipsolverDnZZgesv", "library"); + subst("cusolverDnZZgesv_bufferSize", "hipsolverDnZZgesv_bufferSize", "library"); + subst("cusolverDnZgebrd", "hipsolverDnZgebrd", "library"); + subst("cusolverDnZgebrd_bufferSize", "hipsolverDnZgebrd_bufferSize", "library"); + subst("cusolverDnZgeqrf", "hipsolverDnZgeqrf", "library"); + subst("cusolverDnZgeqrf_bufferSize", "hipsolverDnZgeqrf_bufferSize", "library"); + subst("cusolverDnZgesvd", "hipsolverDnZgesvd", "library"); + subst("cusolverDnZgesvd_bufferSize", "hipsolverDnZgesvd_bufferSize", "library"); + subst("cusolverDnZgesvdaStridedBatched", "hipsolverDnZgesvdaStridedBatched", "library"); + subst("cusolverDnZgesvdaStridedBatched_bufferSize", "hipsolverDnZgesvdaStridedBatched_bufferSize", "library"); + subst("cusolverDnZgesvdj", "hipsolverDnZgesvdj", "library"); + subst("cusolverDnZgesvdjBatched", "hipsolverDnZgesvdjBatched", "library"); + subst("cusolverDnZgesvdjBatched_bufferSize", "hipsolverDnZgesvdjBatched_bufferSize", "library"); + subst("cusolverDnZgesvdj_bufferSize", "hipsolverDnZgesvdj_bufferSize", "library"); + subst("cusolverDnZgetrf", "hipsolverDnZgetrf", "library"); + subst("cusolverDnZgetrf_bufferSize", "hipsolverDnZgetrf_bufferSize", "library"); + subst("cusolverDnZgetrs", "hipsolverDnZgetrs", "library"); + subst("cusolverDnZheevd", "hipsolverDnZheevd", "library"); + subst("cusolverDnZheevd_bufferSize", "hipsolverDnZheevd_bufferSize", "library"); + subst("cusolverDnZheevdx", "hipsolverDnZheevdx", "library"); + subst("cusolverDnZheevdx_bufferSize", "hipsolverDnZheevdx_bufferSize", "library"); + subst("cusolverDnZheevj", "hipsolverDnZheevj", "library"); + subst("cusolverDnZheevjBatched", "hipsolverDnZheevjBatched", "library"); + subst("cusolverDnZheevjBatched_bufferSize", "hipsolverDnZheevjBatched_bufferSize", "library"); + subst("cusolverDnZheevj_bufferSize", "hipsolverDnZheevj_bufferSize", "library"); + subst("cusolverDnZhegvd", "hipsolverDnZhegvd", "library"); + subst("cusolverDnZhegvd_bufferSize", "hipsolverDnZhegvd_bufferSize", "library"); + subst("cusolverDnZhegvdx", "hipsolverDnZhegvdx", "library"); + subst("cusolverDnZhegvdx_bufferSize", "hipsolverDnZhegvdx_bufferSize", "library"); + subst("cusolverDnZhegvj", "hipsolverDnZhegvj", "library"); + subst("cusolverDnZhegvj_bufferSize", "hipsolverDnZhegvj_bufferSize", "library"); + subst("cusolverDnZhetrd", "hipsolverDnZhetrd", "library"); + subst("cusolverDnZhetrd_bufferSize", "hipsolverDnZhetrd_bufferSize", "library"); + subst("cusolverDnZpotrf", "hipsolverDnZpotrf", "library"); + subst("cusolverDnZpotrfBatched", "hipsolverDnZpotrfBatched", "library"); + subst("cusolverDnZpotrf_bufferSize", "hipsolverDnZpotrf_bufferSize", "library"); + subst("cusolverDnZpotri", "hipsolverDnZpotri", "library"); + subst("cusolverDnZpotri_bufferSize", "hipsolverDnZpotri_bufferSize", "library"); + subst("cusolverDnZpotrs", "hipsolverDnZpotrs", "library"); + subst("cusolverDnZpotrsBatched", "hipsolverDnZpotrsBatched", "library"); + subst("cusolverDnZsytrf", "hipsolverDnZsytrf", "library"); + subst("cusolverDnZsytrf_bufferSize", "hipsolverDnZsytrf_bufferSize", "library"); + subst("cusolverDnZungbr", "hipsolverDnZungbr", "library"); + subst("cusolverDnZungbr_bufferSize", "hipsolverDnZungbr_bufferSize", "library"); + subst("cusolverDnZungqr", "hipsolverDnZungqr", "library"); + subst("cusolverDnZungqr_bufferSize", "hipsolverDnZungqr_bufferSize", "library"); + subst("cusolverDnZungtr", "hipsolverDnZungtr", "library"); + subst("cusolverDnZungtr_bufferSize", "hipsolverDnZungtr_bufferSize", "library"); + subst("cusolverDnZunmqr", "hipsolverDnZunmqr", "library"); + subst("cusolverDnZunmqr_bufferSize", "hipsolverDnZunmqr_bufferSize", "library"); + subst("cusolverDnZunmtr", "hipsolverDnZunmtr", "library"); + subst("cusolverDnZunmtr_bufferSize", "hipsolverDnZunmtr_bufferSize", "library"); + subst("cusolverRfAccessBundledFactorsDevice", "hipsolverRfAccessBundledFactorsDevice", "library"); + subst("cusolverRfAnalyze", "hipsolverRfAnalyze", "library"); + subst("cusolverRfBatchAnalyze", "hipsolverRfBatchAnalyze", "library"); + subst("cusolverRfBatchRefactor", "hipsolverRfBatchRefactor", "library"); + subst("cusolverRfBatchResetValues", "hipsolverRfBatchResetValues", "library"); + subst("cusolverRfBatchSetupHost", "hipsolverRfBatchSetupHost", "library"); + subst("cusolverRfBatchSolve", "hipsolverRfBatchSolve", "library"); + subst("cusolverRfBatchZeroPivot", "hipsolverRfBatchZeroPivot", "library"); + subst("cusolverRfCreate", "hipsolverRfCreate", "library"); + subst("cusolverRfDestroy", "hipsolverRfDestroy", "library"); + subst("cusolverRfExtractBundledFactorsHost", "hipsolverRfExtractBundledFactorsHost", "library"); + subst("cusolverRfExtractSplitFactorsHost", "hipsolverRfExtractSplitFactorsHost", "library"); + subst("cusolverRfGetMatrixFormat", "hipsolverRfGetMatrixFormat", "library"); + subst("cusolverRfGetNumericBoostReport", "hipsolverRfGetNumericBoostReport", "library"); + subst("cusolverRfGetNumericProperties", "hipsolverRfGetNumericProperties", "library"); + subst("cusolverRfGetResetValuesFastMode", "hipsolverRfGetResetValuesFastMode", "library"); + subst("cusolverRfRefactor", "hipsolverRfRefactor", "library"); + subst("cusolverRfResetValues", "hipsolverRfResetValues", "library"); + subst("cusolverRfSetAlgs", "hipsolverRfSetAlgs", "library"); + subst("cusolverRfSetMatrixFormat", "hipsolverRfSetMatrixFormat", "library"); + subst("cusolverRfSetNumericProperties", "hipsolverRfSetNumericProperties", "library"); + subst("cusolverRfSetResetValuesFastMode", "hipsolverRfSetResetValuesFastMode", "library"); + subst("cusolverRfSetupDevice", "hipsolverRfSetupDevice", "library"); + subst("cusolverRfSetupHost", "hipsolverRfSetupHost", "library"); + subst("cusolverRfSolve", "hipsolverRfSolve", "library"); + subst("cusolverSpCreate", "hipsolverSpCreate", "library"); + subst("cusolverSpDcsrlsvchol", "hipsolverSpDcsrlsvchol", "library"); + subst("cusolverSpDcsrlsvcholHost", "hipsolverSpDcsrlsvcholHost", "library"); + subst("cusolverSpDestroy", "hipsolverSpDestroy", "library"); + subst("cusolverSpScsrlsvchol", "hipsolverSpScsrlsvchol", "library"); + subst("cusolverSpScsrlsvcholHost", "hipsolverSpScsrlsvcholHost", "library"); + subst("cusolverSpSetStream", "hipsolverSpSetStream", "library"); + subst("cusparseAxpby", "hipsparseAxpby", "library"); + subst("cusparseBlockedEllGet", "hipsparseBlockedEllGet", "library"); + subst("cusparseCaxpyi", "hipsparseCaxpyi", "library"); + subst("cusparseCbsr2csr", "hipsparseCbsr2csr", "library"); + subst("cusparseCbsric02", "hipsparseCbsric02", "library"); + subst("cusparseCbsric02_analysis", "hipsparseCbsric02_analysis", "library"); + subst("cusparseCbsric02_bufferSize", "hipsparseCbsric02_bufferSize", "library"); + subst("cusparseCbsrilu02", "hipsparseCbsrilu02", "library"); + subst("cusparseCbsrilu02_analysis", "hipsparseCbsrilu02_analysis", "library"); + subst("cusparseCbsrilu02_bufferSize", "hipsparseCbsrilu02_bufferSize", "library"); + subst("cusparseCbsrilu02_numericBoost", "hipsparseCbsrilu02_numericBoost", "library"); + subst("cusparseCbsrmm", "hipsparseCbsrmm", "library"); + subst("cusparseCbsrmv", "hipsparseCbsrmv", "library"); + subst("cusparseCbsrsm2_analysis", "hipsparseCbsrsm2_analysis", "library"); + subst("cusparseCbsrsm2_bufferSize", "hipsparseCbsrsm2_bufferSize", "library"); + subst("cusparseCbsrsm2_solve", "hipsparseCbsrsm2_solve", "library"); + subst("cusparseCbsrsv2_analysis", "hipsparseCbsrsv2_analysis", "library"); + subst("cusparseCbsrsv2_bufferSize", "hipsparseCbsrsv2_bufferSize", "library"); + subst("cusparseCbsrsv2_bufferSizeExt", "hipsparseCbsrsv2_bufferSizeExt", "library"); + subst("cusparseCbsrsv2_solve", "hipsparseCbsrsv2_solve", "library"); + subst("cusparseCbsrxmv", "hipsparseCbsrxmv", "library"); + subst("cusparseCcsc2dense", "hipsparseCcsc2dense", "library"); + subst("cusparseCcsr2bsr", "hipsparseCcsr2bsr", "library"); + subst("cusparseCcsr2csc", "hipsparseCcsr2csc", "library"); + subst("cusparseCcsr2csr_compress", "hipsparseCcsr2csr_compress", "library"); + subst("cusparseCcsr2csru", "hipsparseCcsr2csru", "library"); + subst("cusparseCcsr2dense", "hipsparseCcsr2dense", "library"); + subst("cusparseCcsr2gebsr", "hipsparseCcsr2gebsr", "library"); + subst("cusparseCcsr2gebsr_bufferSize", "hipsparseCcsr2gebsr_bufferSize", "library"); + subst("cusparseCcsr2hyb", "hipsparseCcsr2hyb", "library"); + subst("cusparseCcsrcolor", "hipsparseCcsrcolor", "library"); + subst("cusparseCcsrgeam", "hipsparseCcsrgeam", "library"); + subst("cusparseCcsrgeam2", "hipsparseCcsrgeam2", "library"); + subst("cusparseCcsrgeam2_bufferSizeExt", "hipsparseCcsrgeam2_bufferSizeExt", "library"); + subst("cusparseCcsrgemm", "hipsparseCcsrgemm", "library"); + subst("cusparseCcsrgemm2", "hipsparseCcsrgemm2", "library"); + subst("cusparseCcsrgemm2_bufferSizeExt", "hipsparseCcsrgemm2_bufferSizeExt", "library"); + subst("cusparseCcsric02", "hipsparseCcsric02", "library"); + subst("cusparseCcsric02_analysis", "hipsparseCcsric02_analysis", "library"); + subst("cusparseCcsric02_bufferSize", "hipsparseCcsric02_bufferSize", "library"); + subst("cusparseCcsric02_bufferSizeExt", "hipsparseCcsric02_bufferSizeExt", "library"); + subst("cusparseCcsrilu02", "hipsparseCcsrilu02", "library"); + subst("cusparseCcsrilu02_analysis", "hipsparseCcsrilu02_analysis", "library"); + subst("cusparseCcsrilu02_bufferSize", "hipsparseCcsrilu02_bufferSize", "library"); + subst("cusparseCcsrilu02_bufferSizeExt", "hipsparseCcsrilu02_bufferSizeExt", "library"); + subst("cusparseCcsrilu02_numericBoost", "hipsparseCcsrilu02_numericBoost", "library"); + subst("cusparseCcsrmm", "hipsparseCcsrmm", "library"); + subst("cusparseCcsrmm2", "hipsparseCcsrmm2", "library"); + subst("cusparseCcsrmv", "hipsparseCcsrmv", "library"); + subst("cusparseCcsrsm2_analysis", "hipsparseCcsrsm2_analysis", "library"); + subst("cusparseCcsrsm2_bufferSizeExt", "hipsparseCcsrsm2_bufferSizeExt", "library"); + subst("cusparseCcsrsm2_solve", "hipsparseCcsrsm2_solve", "library"); + subst("cusparseCcsrsv2_analysis", "hipsparseCcsrsv2_analysis", "library"); + subst("cusparseCcsrsv2_bufferSize", "hipsparseCcsrsv2_bufferSize", "library"); + subst("cusparseCcsrsv2_bufferSizeExt", "hipsparseCcsrsv2_bufferSizeExt", "library"); + subst("cusparseCcsrsv2_solve", "hipsparseCcsrsv2_solve", "library"); + subst("cusparseCcsru2csr", "hipsparseCcsru2csr", "library"); + subst("cusparseCcsru2csr_bufferSizeExt", "hipsparseCcsru2csr_bufferSizeExt", "library"); + subst("cusparseCdense2csc", "hipsparseCdense2csc", "library"); + subst("cusparseCdense2csr", "hipsparseCdense2csr", "library"); + subst("cusparseCdotci", "hipsparseCdotci", "library"); + subst("cusparseCdoti", "hipsparseCdoti", "library"); + subst("cusparseCgebsr2csr", "hipsparseCgebsr2csr", "library"); + subst("cusparseCgebsr2gebsc", "hipsparseCgebsr2gebsc", "library"); + subst("cusparseCgebsr2gebsc_bufferSize", "hipsparseCgebsr2gebsc_bufferSize", "library"); + subst("cusparseCgebsr2gebsr", "hipsparseCgebsr2gebsr", "library"); + subst("cusparseCgebsr2gebsr_bufferSize", "hipsparseCgebsr2gebsr_bufferSize", "library"); + subst("cusparseCgemmi", "hipsparseCgemmi", "library"); + subst("cusparseCgemvi", "hipsparseCgemvi", "library"); + subst("cusparseCgemvi_bufferSize", "hipsparseCgemvi_bufferSize", "library"); + subst("cusparseCgpsvInterleavedBatch", "hipsparseCgpsvInterleavedBatch", "library"); + subst("cusparseCgpsvInterleavedBatch_bufferSizeExt", "hipsparseCgpsvInterleavedBatch_bufferSizeExt", "library"); + subst("cusparseCgthr", "hipsparseCgthr", "library"); + subst("cusparseCgthrz", "hipsparseCgthrz", "library"); + subst("cusparseCgtsv2", "hipsparseCgtsv2", "library"); + subst("cusparseCgtsv2StridedBatch", "hipsparseCgtsv2StridedBatch", "library"); + subst("cusparseCgtsv2StridedBatch_bufferSizeExt", "hipsparseCgtsv2StridedBatch_bufferSizeExt", "library"); + subst("cusparseCgtsv2_bufferSizeExt", "hipsparseCgtsv2_bufferSizeExt", "library"); + subst("cusparseCgtsv2_nopivot", "hipsparseCgtsv2_nopivot", "library"); + subst("cusparseCgtsv2_nopivot_bufferSizeExt", "hipsparseCgtsv2_nopivot_bufferSizeExt", "library"); + subst("cusparseCgtsvInterleavedBatch", "hipsparseCgtsvInterleavedBatch", "library"); + subst("cusparseCgtsvInterleavedBatch_bufferSizeExt", "hipsparseCgtsvInterleavedBatch_bufferSizeExt", "library"); + subst("cusparseChyb2csr", "hipsparseChyb2csr", "library"); + subst("cusparseChybmv", "hipsparseChybmv", "library"); + subst("cusparseCnnz", "hipsparseCnnz", "library"); + subst("cusparseCnnz_compress", "hipsparseCnnz_compress", "library"); + subst("cusparseConstBlockedEllGet", "hipsparseConstBlockedEllGet", "library"); + subst("cusparseConstCooGet", "hipsparseConstCooGet", "library"); + subst("cusparseConstCscGet", "hipsparseConstCscGet", "library"); + subst("cusparseConstCsrGet", "hipsparseConstCsrGet", "library"); + subst("cusparseConstDnMatGet", "hipsparseConstDnMatGet", "library"); + subst("cusparseConstDnMatGetValues", "hipsparseConstDnMatGetValues", "library"); + subst("cusparseConstDnVecGet", "hipsparseConstDnVecGet", "library"); + subst("cusparseConstDnVecGetValues", "hipsparseConstDnVecGetValues", "library"); + subst("cusparseConstSpMatGetValues", "hipsparseConstSpMatGetValues", "library"); + subst("cusparseConstSpVecGet", "hipsparseConstSpVecGet", "library"); + subst("cusparseConstSpVecGetValues", "hipsparseConstSpVecGetValues", "library"); + subst("cusparseCooAoSGet", "hipsparseCooAoSGet", "library"); + subst("cusparseCooGet", "hipsparseCooGet", "library"); + subst("cusparseCooSetPointers", "hipsparseCooSetPointers", "library"); + subst("cusparseCooSetStridedBatch", "hipsparseCooSetStridedBatch", "library"); + subst("cusparseCopyMatDescr", "hipsparseCopyMatDescr", "library"); + subst("cusparseCreate", "hipsparseCreate", "library"); + subst("cusparseCreateBlockedEll", "hipsparseCreateBlockedEll", "library"); + subst("cusparseCreateBsric02Info", "hipsparseCreateBsric02Info", "library"); + subst("cusparseCreateBsrilu02Info", "hipsparseCreateBsrilu02Info", "library"); + subst("cusparseCreateBsrsm2Info", "hipsparseCreateBsrsm2Info", "library"); + subst("cusparseCreateBsrsv2Info", "hipsparseCreateBsrsv2Info", "library"); + subst("cusparseCreateColorInfo", "hipsparseCreateColorInfo", "library"); + subst("cusparseCreateConstBlockedEll", "hipsparseCreateConstBlockedEll", "library"); + subst("cusparseCreateConstCoo", "hipsparseCreateConstCoo", "library"); + subst("cusparseCreateConstCsc", "hipsparseCreateConstCsc", "library"); + subst("cusparseCreateConstCsr", "hipsparseCreateConstCsr", "library"); + subst("cusparseCreateConstDnMat", "hipsparseCreateConstDnMat", "library"); + subst("cusparseCreateConstDnVec", "hipsparseCreateConstDnVec", "library"); + subst("cusparseCreateConstSpVec", "hipsparseCreateConstSpVec", "library"); + subst("cusparseCreateCoo", "hipsparseCreateCoo", "library"); + subst("cusparseCreateCooAoS", "hipsparseCreateCooAoS", "library"); + subst("cusparseCreateCsc", "hipsparseCreateCsc", "library"); + subst("cusparseCreateCsr", "hipsparseCreateCsr", "library"); + subst("cusparseCreateCsrgemm2Info", "hipsparseCreateCsrgemm2Info", "library"); + subst("cusparseCreateCsric02Info", "hipsparseCreateCsric02Info", "library"); + subst("cusparseCreateCsrilu02Info", "hipsparseCreateCsrilu02Info", "library"); + subst("cusparseCreateCsrsm2Info", "hipsparseCreateCsrsm2Info", "library"); + subst("cusparseCreateCsrsv2Info", "hipsparseCreateCsrsv2Info", "library"); + subst("cusparseCreateCsru2csrInfo", "hipsparseCreateCsru2csrInfo", "library"); + subst("cusparseCreateDnMat", "hipsparseCreateDnMat", "library"); + subst("cusparseCreateDnVec", "hipsparseCreateDnVec", "library"); + subst("cusparseCreateHybMat", "hipsparseCreateHybMat", "library"); + subst("cusparseCreateIdentityPermutation", "hipsparseCreateIdentityPermutation", "library"); + subst("cusparseCreateMatDescr", "hipsparseCreateMatDescr", "library"); + subst("cusparseCreatePruneInfo", "hipsparseCreatePruneInfo", "library"); + subst("cusparseCreateSpVec", "hipsparseCreateSpVec", "library"); + subst("cusparseCscGet", "hipsparseCscGet", "library"); + subst("cusparseCscSetPointers", "hipsparseCscSetPointers", "library"); + subst("cusparseCsctr", "hipsparseCsctr", "library"); + subst("cusparseCsr2cscEx2", "hipsparseCsr2cscEx2", "library"); + subst("cusparseCsr2cscEx2_bufferSize", "hipsparseCsr2cscEx2_bufferSize", "library"); + subst("cusparseCsrGet", "hipsparseCsrGet", "library"); + subst("cusparseCsrSetPointers", "hipsparseCsrSetPointers", "library"); + subst("cusparseCsrSetStridedBatch", "hipsparseCsrSetStridedBatch", "library"); + subst("cusparseDaxpyi", "hipsparseDaxpyi", "library"); + subst("cusparseDbsr2csr", "hipsparseDbsr2csr", "library"); + subst("cusparseDbsric02", "hipsparseDbsric02", "library"); + subst("cusparseDbsric02_analysis", "hipsparseDbsric02_analysis", "library"); + subst("cusparseDbsric02_bufferSize", "hipsparseDbsric02_bufferSize", "library"); + subst("cusparseDbsrilu02", "hipsparseDbsrilu02", "library"); + subst("cusparseDbsrilu02_analysis", "hipsparseDbsrilu02_analysis", "library"); + subst("cusparseDbsrilu02_bufferSize", "hipsparseDbsrilu02_bufferSize", "library"); + subst("cusparseDbsrilu02_numericBoost", "hipsparseDbsrilu02_numericBoost", "library"); + subst("cusparseDbsrmm", "hipsparseDbsrmm", "library"); + subst("cusparseDbsrmv", "hipsparseDbsrmv", "library"); + subst("cusparseDbsrsm2_analysis", "hipsparseDbsrsm2_analysis", "library"); + subst("cusparseDbsrsm2_bufferSize", "hipsparseDbsrsm2_bufferSize", "library"); + subst("cusparseDbsrsm2_solve", "hipsparseDbsrsm2_solve", "library"); + subst("cusparseDbsrsv2_analysis", "hipsparseDbsrsv2_analysis", "library"); + subst("cusparseDbsrsv2_bufferSize", "hipsparseDbsrsv2_bufferSize", "library"); + subst("cusparseDbsrsv2_bufferSizeExt", "hipsparseDbsrsv2_bufferSizeExt", "library"); + subst("cusparseDbsrsv2_solve", "hipsparseDbsrsv2_solve", "library"); + subst("cusparseDbsrxmv", "hipsparseDbsrxmv", "library"); + subst("cusparseDcsc2dense", "hipsparseDcsc2dense", "library"); + subst("cusparseDcsr2bsr", "hipsparseDcsr2bsr", "library"); + subst("cusparseDcsr2csc", "hipsparseDcsr2csc", "library"); + subst("cusparseDcsr2csr_compress", "hipsparseDcsr2csr_compress", "library"); + subst("cusparseDcsr2csru", "hipsparseDcsr2csru", "library"); + subst("cusparseDcsr2dense", "hipsparseDcsr2dense", "library"); + subst("cusparseDcsr2gebsr", "hipsparseDcsr2gebsr", "library"); + subst("cusparseDcsr2gebsr_bufferSize", "hipsparseDcsr2gebsr_bufferSize", "library"); + subst("cusparseDcsr2hyb", "hipsparseDcsr2hyb", "library"); + subst("cusparseDcsrcolor", "hipsparseDcsrcolor", "library"); + subst("cusparseDcsrgeam", "hipsparseDcsrgeam", "library"); + subst("cusparseDcsrgeam2", "hipsparseDcsrgeam2", "library"); + subst("cusparseDcsrgeam2_bufferSizeExt", "hipsparseDcsrgeam2_bufferSizeExt", "library"); + subst("cusparseDcsrgemm", "hipsparseDcsrgemm", "library"); + subst("cusparseDcsrgemm2", "hipsparseDcsrgemm2", "library"); + subst("cusparseDcsrgemm2_bufferSizeExt", "hipsparseDcsrgemm2_bufferSizeExt", "library"); + subst("cusparseDcsric02", "hipsparseDcsric02", "library"); + subst("cusparseDcsric02_analysis", "hipsparseDcsric02_analysis", "library"); + subst("cusparseDcsric02_bufferSize", "hipsparseDcsric02_bufferSize", "library"); + subst("cusparseDcsric02_bufferSizeExt", "hipsparseDcsric02_bufferSizeExt", "library"); + subst("cusparseDcsrilu02", "hipsparseDcsrilu02", "library"); + subst("cusparseDcsrilu02_analysis", "hipsparseDcsrilu02_analysis", "library"); + subst("cusparseDcsrilu02_bufferSize", "hipsparseDcsrilu02_bufferSize", "library"); + subst("cusparseDcsrilu02_bufferSizeExt", "hipsparseDcsrilu02_bufferSizeExt", "library"); + subst("cusparseDcsrilu02_numericBoost", "hipsparseDcsrilu02_numericBoost", "library"); + subst("cusparseDcsrmm", "hipsparseDcsrmm", "library"); + subst("cusparseDcsrmm2", "hipsparseDcsrmm2", "library"); + subst("cusparseDcsrmv", "hipsparseDcsrmv", "library"); + subst("cusparseDcsrsm2_analysis", "hipsparseDcsrsm2_analysis", "library"); + subst("cusparseDcsrsm2_bufferSizeExt", "hipsparseDcsrsm2_bufferSizeExt", "library"); + subst("cusparseDcsrsm2_solve", "hipsparseDcsrsm2_solve", "library"); + subst("cusparseDcsrsv2_analysis", "hipsparseDcsrsv2_analysis", "library"); + subst("cusparseDcsrsv2_bufferSize", "hipsparseDcsrsv2_bufferSize", "library"); + subst("cusparseDcsrsv2_bufferSizeExt", "hipsparseDcsrsv2_bufferSizeExt", "library"); + subst("cusparseDcsrsv2_solve", "hipsparseDcsrsv2_solve", "library"); + subst("cusparseDcsru2csr", "hipsparseDcsru2csr", "library"); + subst("cusparseDcsru2csr_bufferSizeExt", "hipsparseDcsru2csr_bufferSizeExt", "library"); + subst("cusparseDdense2csc", "hipsparseDdense2csc", "library"); + subst("cusparseDdense2csr", "hipsparseDdense2csr", "library"); + subst("cusparseDdoti", "hipsparseDdoti", "library"); + subst("cusparseDenseToSparse_analysis", "hipsparseDenseToSparse_analysis", "library"); + subst("cusparseDenseToSparse_bufferSize", "hipsparseDenseToSparse_bufferSize", "library"); + subst("cusparseDenseToSparse_convert", "hipsparseDenseToSparse_convert", "library"); + subst("cusparseDestroy", "hipsparseDestroy", "library"); + subst("cusparseDestroyBsric02Info", "hipsparseDestroyBsric02Info", "library"); + subst("cusparseDestroyBsrilu02Info", "hipsparseDestroyBsrilu02Info", "library"); + subst("cusparseDestroyBsrsm2Info", "hipsparseDestroyBsrsm2Info", "library"); + subst("cusparseDestroyBsrsv2Info", "hipsparseDestroyBsrsv2Info", "library"); + subst("cusparseDestroyColorInfo", "hipsparseDestroyColorInfo", "library"); + subst("cusparseDestroyCsrgemm2Info", "hipsparseDestroyCsrgemm2Info", "library"); + subst("cusparseDestroyCsric02Info", "hipsparseDestroyCsric02Info", "library"); + subst("cusparseDestroyCsrilu02Info", "hipsparseDestroyCsrilu02Info", "library"); + subst("cusparseDestroyCsrsm2Info", "hipsparseDestroyCsrsm2Info", "library"); + subst("cusparseDestroyCsrsv2Info", "hipsparseDestroyCsrsv2Info", "library"); + subst("cusparseDestroyCsru2csrInfo", "hipsparseDestroyCsru2csrInfo", "library"); + subst("cusparseDestroyDnMat", "hipsparseDestroyDnMat", "library"); + subst("cusparseDestroyDnVec", "hipsparseDestroyDnVec", "library"); + subst("cusparseDestroyHybMat", "hipsparseDestroyHybMat", "library"); + subst("cusparseDestroyMatDescr", "hipsparseDestroyMatDescr", "library"); + subst("cusparseDestroyPruneInfo", "hipsparseDestroyPruneInfo", "library"); + subst("cusparseDestroySpMat", "hipsparseDestroySpMat", "library"); + subst("cusparseDestroySpVec", "hipsparseDestroySpVec", "library"); + subst("cusparseDgebsr2csr", "hipsparseDgebsr2csr", "library"); + subst("cusparseDgebsr2gebsc", "hipsparseDgebsr2gebsc", "library"); + subst("cusparseDgebsr2gebsc_bufferSize", "hipsparseDgebsr2gebsc_bufferSize", "library"); + subst("cusparseDgebsr2gebsr", "hipsparseDgebsr2gebsr", "library"); + subst("cusparseDgebsr2gebsr_bufferSize", "hipsparseDgebsr2gebsr_bufferSize", "library"); + subst("cusparseDgemmi", "hipsparseDgemmi", "library"); + subst("cusparseDgemvi", "hipsparseDgemvi", "library"); + subst("cusparseDgemvi_bufferSize", "hipsparseDgemvi_bufferSize", "library"); + subst("cusparseDgpsvInterleavedBatch", "hipsparseDgpsvInterleavedBatch", "library"); + subst("cusparseDgpsvInterleavedBatch_bufferSizeExt", "hipsparseDgpsvInterleavedBatch_bufferSizeExt", "library"); + subst("cusparseDgthr", "hipsparseDgthr", "library"); + subst("cusparseDgthrz", "hipsparseDgthrz", "library"); + subst("cusparseDgtsv2", "hipsparseDgtsv2", "library"); + subst("cusparseDgtsv2StridedBatch", "hipsparseDgtsv2StridedBatch", "library"); + subst("cusparseDgtsv2StridedBatch_bufferSizeExt", "hipsparseDgtsv2StridedBatch_bufferSizeExt", "library"); + subst("cusparseDgtsv2_bufferSizeExt", "hipsparseDgtsv2_bufferSizeExt", "library"); + subst("cusparseDgtsv2_nopivot", "hipsparseDgtsv2_nopivot", "library"); + subst("cusparseDgtsv2_nopivot_bufferSizeExt", "hipsparseDgtsv2_nopivot_bufferSizeExt", "library"); + subst("cusparseDgtsvInterleavedBatch", "hipsparseDgtsvInterleavedBatch", "library"); + subst("cusparseDgtsvInterleavedBatch_bufferSizeExt", "hipsparseDgtsvInterleavedBatch_bufferSizeExt", "library"); + subst("cusparseDhyb2csr", "hipsparseDhyb2csr", "library"); + subst("cusparseDhybmv", "hipsparseDhybmv", "library"); + subst("cusparseDnMatGet", "hipsparseDnMatGet", "library"); + subst("cusparseDnMatGetStridedBatch", "hipsparseDnMatGetStridedBatch", "library"); + subst("cusparseDnMatGetValues", "hipsparseDnMatGetValues", "library"); + subst("cusparseDnMatSetStridedBatch", "hipsparseDnMatSetStridedBatch", "library"); + subst("cusparseDnMatSetValues", "hipsparseDnMatSetValues", "library"); + subst("cusparseDnVecGet", "hipsparseDnVecGet", "library"); + subst("cusparseDnVecGetValues", "hipsparseDnVecGetValues", "library"); + subst("cusparseDnVecSetValues", "hipsparseDnVecSetValues", "library"); + subst("cusparseDnnz", "hipsparseDnnz", "library"); + subst("cusparseDnnz_compress", "hipsparseDnnz_compress", "library"); + subst("cusparseDpruneCsr2csr", "hipsparseDpruneCsr2csr", "library"); + subst("cusparseDpruneCsr2csrByPercentage", "hipsparseDpruneCsr2csrByPercentage", "library"); + subst("cusparseDpruneCsr2csrByPercentage_bufferSizeExt", "hipsparseDpruneCsr2csrByPercentage_bufferSizeExt", "library"); + subst("cusparseDpruneCsr2csrNnz", "hipsparseDpruneCsr2csrNnz", "library"); + subst("cusparseDpruneCsr2csrNnzByPercentage", "hipsparseDpruneCsr2csrNnzByPercentage", "library"); + subst("cusparseDpruneCsr2csr_bufferSizeExt", "hipsparseDpruneCsr2csr_bufferSizeExt", "library"); + subst("cusparseDpruneDense2csr", "hipsparseDpruneDense2csr", "library"); + subst("cusparseDpruneDense2csrByPercentage", "hipsparseDpruneDense2csrByPercentage", "library"); + subst("cusparseDpruneDense2csrByPercentage_bufferSizeExt", "hipsparseDpruneDense2csrByPercentage_bufferSizeExt", "library"); + subst("cusparseDpruneDense2csrNnz", "hipsparseDpruneDense2csrNnz", "library"); + subst("cusparseDpruneDense2csrNnzByPercentage", "hipsparseDpruneDense2csrNnzByPercentage", "library"); + subst("cusparseDpruneDense2csr_bufferSizeExt", "hipsparseDpruneDense2csr_bufferSizeExt", "library"); + subst("cusparseDroti", "hipsparseDroti", "library"); + subst("cusparseDsctr", "hipsparseDsctr", "library"); + subst("cusparseGather", "hipsparseGather", "library"); + subst("cusparseGetErrorName", "hipsparseGetErrorName", "library"); + subst("cusparseGetErrorString", "hipsparseGetErrorString", "library"); + subst("cusparseGetMatDiagType", "hipsparseGetMatDiagType", "library"); + subst("cusparseGetMatFillMode", "hipsparseGetMatFillMode", "library"); + subst("cusparseGetMatIndexBase", "hipsparseGetMatIndexBase", "library"); + subst("cusparseGetMatType", "hipsparseGetMatType", "library"); + subst("cusparseGetPointerMode", "hipsparseGetPointerMode", "library"); + subst("cusparseGetStream", "hipsparseGetStream", "library"); + subst("cusparseGetVersion", "hipsparseGetVersion", "library"); + subst("cusparseRot", "hipsparseRot", "library"); + subst("cusparseSDDMM", "hipsparseSDDMM", "library"); + subst("cusparseSDDMM_bufferSize", "hipsparseSDDMM_bufferSize", "library"); + subst("cusparseSDDMM_preprocess", "hipsparseSDDMM_preprocess", "library"); + subst("cusparseSaxpyi", "hipsparseSaxpyi", "library"); + subst("cusparseSbsr2csr", "hipsparseSbsr2csr", "library"); + subst("cusparseSbsric02", "hipsparseSbsric02", "library"); + subst("cusparseSbsric02_analysis", "hipsparseSbsric02_analysis", "library"); + subst("cusparseSbsric02_bufferSize", "hipsparseSbsric02_bufferSize", "library"); + subst("cusparseSbsrilu02", "hipsparseSbsrilu02", "library"); + subst("cusparseSbsrilu02_analysis", "hipsparseSbsrilu02_analysis", "library"); + subst("cusparseSbsrilu02_bufferSize", "hipsparseSbsrilu02_bufferSize", "library"); + subst("cusparseSbsrilu02_numericBoost", "hipsparseSbsrilu02_numericBoost", "library"); + subst("cusparseSbsrmm", "hipsparseSbsrmm", "library"); + subst("cusparseSbsrmv", "hipsparseSbsrmv", "library"); + subst("cusparseSbsrsm2_analysis", "hipsparseSbsrsm2_analysis", "library"); + subst("cusparseSbsrsm2_bufferSize", "hipsparseSbsrsm2_bufferSize", "library"); + subst("cusparseSbsrsm2_solve", "hipsparseSbsrsm2_solve", "library"); + subst("cusparseSbsrsv2_analysis", "hipsparseSbsrsv2_analysis", "library"); + subst("cusparseSbsrsv2_bufferSize", "hipsparseSbsrsv2_bufferSize", "library"); + subst("cusparseSbsrsv2_bufferSizeExt", "hipsparseSbsrsv2_bufferSizeExt", "library"); + subst("cusparseSbsrsv2_solve", "hipsparseSbsrsv2_solve", "library"); + subst("cusparseSbsrxmv", "hipsparseSbsrxmv", "library"); + subst("cusparseScatter", "hipsparseScatter", "library"); + subst("cusparseScsc2dense", "hipsparseScsc2dense", "library"); + subst("cusparseScsr2bsr", "hipsparseScsr2bsr", "library"); + subst("cusparseScsr2csc", "hipsparseScsr2csc", "library"); + subst("cusparseScsr2csr_compress", "hipsparseScsr2csr_compress", "library"); + subst("cusparseScsr2csru", "hipsparseScsr2csru", "library"); + subst("cusparseScsr2dense", "hipsparseScsr2dense", "library"); + subst("cusparseScsr2gebsr", "hipsparseScsr2gebsr", "library"); + subst("cusparseScsr2gebsr_bufferSize", "hipsparseScsr2gebsr_bufferSize", "library"); + subst("cusparseScsr2hyb", "hipsparseScsr2hyb", "library"); + subst("cusparseScsrcolor", "hipsparseScsrcolor", "library"); + subst("cusparseScsrgeam", "hipsparseScsrgeam", "library"); + subst("cusparseScsrgeam2", "hipsparseScsrgeam2", "library"); + subst("cusparseScsrgeam2_bufferSizeExt", "hipsparseScsrgeam2_bufferSizeExt", "library"); + subst("cusparseScsrgemm", "hipsparseScsrgemm", "library"); + subst("cusparseScsrgemm2", "hipsparseScsrgemm2", "library"); + subst("cusparseScsrgemm2_bufferSizeExt", "hipsparseScsrgemm2_bufferSizeExt", "library"); + subst("cusparseScsric02", "hipsparseScsric02", "library"); + subst("cusparseScsric02_analysis", "hipsparseScsric02_analysis", "library"); + subst("cusparseScsric02_bufferSize", "hipsparseScsric02_bufferSize", "library"); + subst("cusparseScsric02_bufferSizeExt", "hipsparseScsric02_bufferSizeExt", "library"); + subst("cusparseScsrilu02", "hipsparseScsrilu02", "library"); + subst("cusparseScsrilu02_analysis", "hipsparseScsrilu02_analysis", "library"); + subst("cusparseScsrilu02_bufferSize", "hipsparseScsrilu02_bufferSize", "library"); + subst("cusparseScsrilu02_bufferSizeExt", "hipsparseScsrilu02_bufferSizeExt", "library"); + subst("cusparseScsrilu02_numericBoost", "hipsparseScsrilu02_numericBoost", "library"); + subst("cusparseScsrmm", "hipsparseScsrmm", "library"); + subst("cusparseScsrmm2", "hipsparseScsrmm2", "library"); + subst("cusparseScsrmv", "hipsparseScsrmv", "library"); + subst("cusparseScsrsm2_analysis", "hipsparseScsrsm2_analysis", "library"); + subst("cusparseScsrsm2_bufferSizeExt", "hipsparseScsrsm2_bufferSizeExt", "library"); + subst("cusparseScsrsm2_solve", "hipsparseScsrsm2_solve", "library"); + subst("cusparseScsrsv2_analysis", "hipsparseScsrsv2_analysis", "library"); + subst("cusparseScsrsv2_bufferSize", "hipsparseScsrsv2_bufferSize", "library"); + subst("cusparseScsrsv2_bufferSizeExt", "hipsparseScsrsv2_bufferSizeExt", "library"); + subst("cusparseScsrsv2_solve", "hipsparseScsrsv2_solve", "library"); + subst("cusparseScsru2csr", "hipsparseScsru2csr", "library"); + subst("cusparseScsru2csr_bufferSizeExt", "hipsparseScsru2csr_bufferSizeExt", "library"); + subst("cusparseSdense2csc", "hipsparseSdense2csc", "library"); + subst("cusparseSdense2csr", "hipsparseSdense2csr", "library"); + subst("cusparseSdoti", "hipsparseSdoti", "library"); + subst("cusparseSetMatDiagType", "hipsparseSetMatDiagType", "library"); + subst("cusparseSetMatFillMode", "hipsparseSetMatFillMode", "library"); + subst("cusparseSetMatIndexBase", "hipsparseSetMatIndexBase", "library"); + subst("cusparseSetMatType", "hipsparseSetMatType", "library"); + subst("cusparseSetPointerMode", "hipsparseSetPointerMode", "library"); + subst("cusparseSetStream", "hipsparseSetStream", "library"); + subst("cusparseSgebsr2csr", "hipsparseSgebsr2csr", "library"); + subst("cusparseSgebsr2gebsc", "hipsparseSgebsr2gebsc", "library"); + subst("cusparseSgebsr2gebsc_bufferSize", "hipsparseSgebsr2gebsc_bufferSize", "library"); + subst("cusparseSgebsr2gebsr", "hipsparseSgebsr2gebsr", "library"); + subst("cusparseSgebsr2gebsr_bufferSize", "hipsparseSgebsr2gebsr_bufferSize", "library"); + subst("cusparseSgemmi", "hipsparseSgemmi", "library"); + subst("cusparseSgemvi", "hipsparseSgemvi", "library"); + subst("cusparseSgemvi_bufferSize", "hipsparseSgemvi_bufferSize", "library"); + subst("cusparseSgpsvInterleavedBatch", "hipsparseSgpsvInterleavedBatch", "library"); + subst("cusparseSgpsvInterleavedBatch_bufferSizeExt", "hipsparseSgpsvInterleavedBatch_bufferSizeExt", "library"); + subst("cusparseSgthr", "hipsparseSgthr", "library"); + subst("cusparseSgthrz", "hipsparseSgthrz", "library"); + subst("cusparseSgtsv2", "hipsparseSgtsv2", "library"); + subst("cusparseSgtsv2StridedBatch", "hipsparseSgtsv2StridedBatch", "library"); + subst("cusparseSgtsv2StridedBatch_bufferSizeExt", "hipsparseSgtsv2StridedBatch_bufferSizeExt", "library"); + subst("cusparseSgtsv2_bufferSizeExt", "hipsparseSgtsv2_bufferSizeExt", "library"); + subst("cusparseSgtsv2_nopivot", "hipsparseSgtsv2_nopivot", "library"); + subst("cusparseSgtsv2_nopivot_bufferSizeExt", "hipsparseSgtsv2_nopivot_bufferSizeExt", "library"); + subst("cusparseSgtsvInterleavedBatch", "hipsparseSgtsvInterleavedBatch", "library"); + subst("cusparseSgtsvInterleavedBatch_bufferSizeExt", "hipsparseSgtsvInterleavedBatch_bufferSizeExt", "library"); + subst("cusparseShyb2csr", "hipsparseShyb2csr", "library"); + subst("cusparseShybmv", "hipsparseShybmv", "library"); + subst("cusparseSnnz", "hipsparseSnnz", "library"); + subst("cusparseSnnz_compress", "hipsparseSnnz_compress", "library"); + subst("cusparseSpGEMM_compute", "hipsparseSpGEMM_compute", "library"); + subst("cusparseSpGEMM_copy", "hipsparseSpGEMM_copy", "library"); + subst("cusparseSpGEMM_createDescr", "hipsparseSpGEMM_createDescr", "library"); + subst("cusparseSpGEMM_destroyDescr", "hipsparseSpGEMM_destroyDescr", "library"); + subst("cusparseSpGEMM_workEstimation", "hipsparseSpGEMM_workEstimation", "library"); + subst("cusparseSpGEMMreuse_compute", "hipsparseSpGEMMreuse_compute", "library"); + subst("cusparseSpGEMMreuse_copy", "hipsparseSpGEMMreuse_copy", "library"); + subst("cusparseSpGEMMreuse_nnz", "hipsparseSpGEMMreuse_nnz", "library"); + subst("cusparseSpGEMMreuse_workEstimation", "hipsparseSpGEMMreuse_workEstimation", "library"); + subst("cusparseSpMM", "hipsparseSpMM", "library"); + subst("cusparseSpMM_bufferSize", "hipsparseSpMM_bufferSize", "library"); + subst("cusparseSpMM_preprocess", "hipsparseSpMM_preprocess", "library"); + subst("cusparseSpMV", "hipsparseSpMV", "library"); + subst("cusparseSpMV_bufferSize", "hipsparseSpMV_bufferSize", "library"); + subst("cusparseSpMV_preprocess", "hipsparseSpMV_preprocess", "library"); + subst("cusparseSpMatGetAttribute", "hipsparseSpMatGetAttribute", "library"); + subst("cusparseSpMatGetFormat", "hipsparseSpMatGetFormat", "library"); + subst("cusparseSpMatGetIndexBase", "hipsparseSpMatGetIndexBase", "library"); + subst("cusparseSpMatGetSize", "hipsparseSpMatGetSize", "library"); + subst("cusparseSpMatGetStridedBatch", "hipsparseSpMatGetStridedBatch", "library"); + subst("cusparseSpMatGetValues", "hipsparseSpMatGetValues", "library"); + subst("cusparseSpMatSetAttribute", "hipsparseSpMatSetAttribute", "library"); + subst("cusparseSpMatSetStridedBatch", "hipsparseSpMatSetStridedBatch", "library"); + subst("cusparseSpMatSetValues", "hipsparseSpMatSetValues", "library"); + subst("cusparseSpSM_analysis", "hipsparseSpSM_analysis", "library"); + subst("cusparseSpSM_bufferSize", "hipsparseSpSM_bufferSize", "library"); + subst("cusparseSpSM_createDescr", "hipsparseSpSM_createDescr", "library"); + subst("cusparseSpSM_destroyDescr", "hipsparseSpSM_destroyDescr", "library"); + subst("cusparseSpSM_solve", "hipsparseSpSM_solve", "library"); + subst("cusparseSpSV_analysis", "hipsparseSpSV_analysis", "library"); + subst("cusparseSpSV_bufferSize", "hipsparseSpSV_bufferSize", "library"); + subst("cusparseSpSV_createDescr", "hipsparseSpSV_createDescr", "library"); + subst("cusparseSpSV_destroyDescr", "hipsparseSpSV_destroyDescr", "library"); + subst("cusparseSpSV_solve", "hipsparseSpSV_solve", "library"); + subst("cusparseSpVV", "hipsparseSpVV", "library"); + subst("cusparseSpVV_bufferSize", "hipsparseSpVV_bufferSize", "library"); + subst("cusparseSpVecGet", "hipsparseSpVecGet", "library"); + subst("cusparseSpVecGetIndexBase", "hipsparseSpVecGetIndexBase", "library"); + subst("cusparseSpVecGetValues", "hipsparseSpVecGetValues", "library"); + subst("cusparseSpVecSetValues", "hipsparseSpVecSetValues", "library"); + subst("cusparseSparseToDense", "hipsparseSparseToDense", "library"); + subst("cusparseSparseToDense_bufferSize", "hipsparseSparseToDense_bufferSize", "library"); + subst("cusparseSpruneCsr2csr", "hipsparseSpruneCsr2csr", "library"); + subst("cusparseSpruneCsr2csrByPercentage", "hipsparseSpruneCsr2csrByPercentage", "library"); + subst("cusparseSpruneCsr2csrByPercentage_bufferSizeExt", "hipsparseSpruneCsr2csrByPercentage_bufferSizeExt", "library"); + subst("cusparseSpruneCsr2csrNnz", "hipsparseSpruneCsr2csrNnz", "library"); + subst("cusparseSpruneCsr2csrNnzByPercentage", "hipsparseSpruneCsr2csrNnzByPercentage", "library"); + subst("cusparseSpruneCsr2csr_bufferSizeExt", "hipsparseSpruneCsr2csr_bufferSizeExt", "library"); + subst("cusparseSpruneDense2csr", "hipsparseSpruneDense2csr", "library"); + subst("cusparseSpruneDense2csrByPercentage", "hipsparseSpruneDense2csrByPercentage", "library"); + subst("cusparseSpruneDense2csrByPercentage_bufferSizeExt", "hipsparseSpruneDense2csrByPercentage_bufferSizeExt", "library"); + subst("cusparseSpruneDense2csrNnz", "hipsparseSpruneDense2csrNnz", "library"); + subst("cusparseSpruneDense2csrNnzByPercentage", "hipsparseSpruneDense2csrNnzByPercentage", "library"); + subst("cusparseSpruneDense2csr_bufferSizeExt", "hipsparseSpruneDense2csr_bufferSizeExt", "library"); + subst("cusparseSroti", "hipsparseSroti", "library"); + subst("cusparseSsctr", "hipsparseSsctr", "library"); + subst("cusparseXbsric02_zeroPivot", "hipsparseXbsric02_zeroPivot", "library"); + subst("cusparseXbsrilu02_zeroPivot", "hipsparseXbsrilu02_zeroPivot", "library"); + subst("cusparseXbsrsm2_zeroPivot", "hipsparseXbsrsm2_zeroPivot", "library"); + subst("cusparseXbsrsv2_zeroPivot", "hipsparseXbsrsv2_zeroPivot", "library"); + subst("cusparseXcoo2csr", "hipsparseXcoo2csr", "library"); + subst("cusparseXcoosortByColumn", "hipsparseXcoosortByColumn", "library"); + subst("cusparseXcoosortByRow", "hipsparseXcoosortByRow", "library"); + subst("cusparseXcoosort_bufferSizeExt", "hipsparseXcoosort_bufferSizeExt", "library"); + subst("cusparseXcscsort", "hipsparseXcscsort", "library"); + subst("cusparseXcscsort_bufferSizeExt", "hipsparseXcscsort_bufferSizeExt", "library"); + subst("cusparseXcsr2bsrNnz", "hipsparseXcsr2bsrNnz", "library"); + subst("cusparseXcsr2coo", "hipsparseXcsr2coo", "library"); + subst("cusparseXcsr2gebsrNnz", "hipsparseXcsr2gebsrNnz", "library"); + subst("cusparseXcsrgeam2Nnz", "hipsparseXcsrgeam2Nnz", "library"); + subst("cusparseXcsrgeamNnz", "hipsparseXcsrgeamNnz", "library"); + subst("cusparseXcsrgemm2Nnz", "hipsparseXcsrgemm2Nnz", "library"); + subst("cusparseXcsrgemmNnz", "hipsparseXcsrgemmNnz", "library"); + subst("cusparseXcsric02_zeroPivot", "hipsparseXcsric02_zeroPivot", "library"); + subst("cusparseXcsrilu02_zeroPivot", "hipsparseXcsrilu02_zeroPivot", "library"); + subst("cusparseXcsrsm2_zeroPivot", "hipsparseXcsrsm2_zeroPivot", "library"); + subst("cusparseXcsrsort", "hipsparseXcsrsort", "library"); + subst("cusparseXcsrsort_bufferSizeExt", "hipsparseXcsrsort_bufferSizeExt", "library"); + subst("cusparseXcsrsv2_zeroPivot", "hipsparseXcsrsv2_zeroPivot", "library"); + subst("cusparseXgebsr2gebsrNnz", "hipsparseXgebsr2gebsrNnz", "library"); + subst("cusparseZaxpyi", "hipsparseZaxpyi", "library"); + subst("cusparseZbsr2csr", "hipsparseZbsr2csr", "library"); + subst("cusparseZbsric02", "hipsparseZbsric02", "library"); + subst("cusparseZbsric02_analysis", "hipsparseZbsric02_analysis", "library"); + subst("cusparseZbsric02_bufferSize", "hipsparseZbsric02_bufferSize", "library"); + subst("cusparseZbsrilu02", "hipsparseZbsrilu02", "library"); + subst("cusparseZbsrilu02_analysis", "hipsparseZbsrilu02_analysis", "library"); + subst("cusparseZbsrilu02_bufferSize", "hipsparseZbsrilu02_bufferSize", "library"); + subst("cusparseZbsrilu02_numericBoost", "hipsparseZbsrilu02_numericBoost", "library"); + subst("cusparseZbsrmm", "hipsparseZbsrmm", "library"); + subst("cusparseZbsrmv", "hipsparseZbsrmv", "library"); + subst("cusparseZbsrsm2_analysis", "hipsparseZbsrsm2_analysis", "library"); + subst("cusparseZbsrsm2_bufferSize", "hipsparseZbsrsm2_bufferSize", "library"); + subst("cusparseZbsrsm2_solve", "hipsparseZbsrsm2_solve", "library"); + subst("cusparseZbsrsv2_analysis", "hipsparseZbsrsv2_analysis", "library"); + subst("cusparseZbsrsv2_bufferSize", "hipsparseZbsrsv2_bufferSize", "library"); + subst("cusparseZbsrsv2_bufferSizeExt", "hipsparseZbsrsv2_bufferSizeExt", "library"); + subst("cusparseZbsrsv2_solve", "hipsparseZbsrsv2_solve", "library"); + subst("cusparseZbsrxmv", "hipsparseZbsrxmv", "library"); + subst("cusparseZcsc2dense", "hipsparseZcsc2dense", "library"); + subst("cusparseZcsr2bsr", "hipsparseZcsr2bsr", "library"); + subst("cusparseZcsr2csc", "hipsparseZcsr2csc", "library"); + subst("cusparseZcsr2csr_compress", "hipsparseZcsr2csr_compress", "library"); + subst("cusparseZcsr2csru", "hipsparseZcsr2csru", "library"); + subst("cusparseZcsr2dense", "hipsparseZcsr2dense", "library"); + subst("cusparseZcsr2gebsr", "hipsparseZcsr2gebsr", "library"); + subst("cusparseZcsr2gebsr_bufferSize", "hipsparseZcsr2gebsr_bufferSize", "library"); + subst("cusparseZcsr2hyb", "hipsparseZcsr2hyb", "library"); + subst("cusparseZcsrcolor", "hipsparseZcsrcolor", "library"); + subst("cusparseZcsrgeam", "hipsparseZcsrgeam", "library"); + subst("cusparseZcsrgeam2", "hipsparseZcsrgeam2", "library"); + subst("cusparseZcsrgeam2_bufferSizeExt", "hipsparseZcsrgeam2_bufferSizeExt", "library"); + subst("cusparseZcsrgemm", "hipsparseZcsrgemm", "library"); + subst("cusparseZcsrgemm2", "hipsparseZcsrgemm2", "library"); + subst("cusparseZcsrgemm2_bufferSizeExt", "hipsparseZcsrgemm2_bufferSizeExt", "library"); + subst("cusparseZcsric02", "hipsparseZcsric02", "library"); + subst("cusparseZcsric02_analysis", "hipsparseZcsric02_analysis", "library"); + subst("cusparseZcsric02_bufferSize", "hipsparseZcsric02_bufferSize", "library"); + subst("cusparseZcsric02_bufferSizeExt", "hipsparseZcsric02_bufferSizeExt", "library"); + subst("cusparseZcsrilu02", "hipsparseZcsrilu02", "library"); + subst("cusparseZcsrilu02_analysis", "hipsparseZcsrilu02_analysis", "library"); + subst("cusparseZcsrilu02_bufferSize", "hipsparseZcsrilu02_bufferSize", "library"); + subst("cusparseZcsrilu02_bufferSizeExt", "hipsparseZcsrilu02_bufferSizeExt", "library"); + subst("cusparseZcsrilu02_numericBoost", "hipsparseZcsrilu02_numericBoost", "library"); + subst("cusparseZcsrmm", "hipsparseZcsrmm", "library"); + subst("cusparseZcsrmm2", "hipsparseZcsrmm2", "library"); + subst("cusparseZcsrmv", "hipsparseZcsrmv", "library"); + subst("cusparseZcsrsm2_analysis", "hipsparseZcsrsm2_analysis", "library"); + subst("cusparseZcsrsm2_bufferSizeExt", "hipsparseZcsrsm2_bufferSizeExt", "library"); + subst("cusparseZcsrsm2_solve", "hipsparseZcsrsm2_solve", "library"); + subst("cusparseZcsrsv2_analysis", "hipsparseZcsrsv2_analysis", "library"); + subst("cusparseZcsrsv2_bufferSize", "hipsparseZcsrsv2_bufferSize", "library"); + subst("cusparseZcsrsv2_bufferSizeExt", "hipsparseZcsrsv2_bufferSizeExt", "library"); + subst("cusparseZcsrsv2_solve", "hipsparseZcsrsv2_solve", "library"); + subst("cusparseZcsru2csr", "hipsparseZcsru2csr", "library"); + subst("cusparseZcsru2csr_bufferSizeExt", "hipsparseZcsru2csr_bufferSizeExt", "library"); + subst("cusparseZdense2csc", "hipsparseZdense2csc", "library"); + subst("cusparseZdense2csr", "hipsparseZdense2csr", "library"); + subst("cusparseZdotci", "hipsparseZdotci", "library"); + subst("cusparseZdoti", "hipsparseZdoti", "library"); + subst("cusparseZgebsr2csr", "hipsparseZgebsr2csr", "library"); + subst("cusparseZgebsr2gebsc", "hipsparseZgebsr2gebsc", "library"); + subst("cusparseZgebsr2gebsc_bufferSize", "hipsparseZgebsr2gebsc_bufferSize", "library"); + subst("cusparseZgebsr2gebsr", "hipsparseZgebsr2gebsr", "library"); + subst("cusparseZgebsr2gebsr_bufferSize", "hipsparseZgebsr2gebsr_bufferSize", "library"); + subst("cusparseZgemmi", "hipsparseZgemmi", "library"); + subst("cusparseZgemvi", "hipsparseZgemvi", "library"); + subst("cusparseZgemvi_bufferSize", "hipsparseZgemvi_bufferSize", "library"); + subst("cusparseZgpsvInterleavedBatch", "hipsparseZgpsvInterleavedBatch", "library"); + subst("cusparseZgpsvInterleavedBatch_bufferSizeExt", "hipsparseZgpsvInterleavedBatch_bufferSizeExt", "library"); + subst("cusparseZgthr", "hipsparseZgthr", "library"); + subst("cusparseZgthrz", "hipsparseZgthrz", "library"); + subst("cusparseZgtsv2", "hipsparseZgtsv2", "library"); + subst("cusparseZgtsv2StridedBatch", "hipsparseZgtsv2StridedBatch", "library"); + subst("cusparseZgtsv2StridedBatch_bufferSizeExt", "hipsparseZgtsv2StridedBatch_bufferSizeExt", "library"); + subst("cusparseZgtsv2_bufferSizeExt", "hipsparseZgtsv2_bufferSizeExt", "library"); + subst("cusparseZgtsv2_nopivot", "hipsparseZgtsv2_nopivot", "library"); + subst("cusparseZgtsv2_nopivot_bufferSizeExt", "hipsparseZgtsv2_nopivot_bufferSizeExt", "library"); + subst("cusparseZgtsvInterleavedBatch", "hipsparseZgtsvInterleavedBatch", "library"); + subst("cusparseZgtsvInterleavedBatch_bufferSizeExt", "hipsparseZgtsvInterleavedBatch_bufferSizeExt", "library"); + subst("cusparseZhyb2csr", "hipsparseZhyb2csr", "library"); + subst("cusparseZhybmv", "hipsparseZhybmv", "library"); + subst("cusparseZnnz", "hipsparseZnnz", "library"); + subst("cusparseZnnz_compress", "hipsparseZnnz_compress", "library"); + subst("cusparseZsctr", "hipsparseZsctr", "library"); + subst("nvrtcAddNameExpression", "hiprtcAddNameExpression", "library"); + subst("nvrtcCompileProgram", "hiprtcCompileProgram", "library"); + subst("nvrtcCreateProgram", "hiprtcCreateProgram", "library"); + subst("nvrtcDestroyProgram", "hiprtcDestroyProgram", "library"); + subst("nvrtcGetCUBIN", "hiprtcGetBitcode", "library"); + subst("nvrtcGetCUBINSize", "hiprtcGetBitcodeSize", "library"); + subst("nvrtcGetErrorString", "hiprtcGetErrorString", "library"); + subst("nvrtcGetLoweredName", "hiprtcGetLoweredName", "library"); + subst("nvrtcGetPTX", "hiprtcGetCode", "library"); + subst("nvrtcGetPTXSize", "hiprtcGetCodeSize", "library"); + subst("nvrtcGetProgramLog", "hiprtcGetProgramLog", "library"); + subst("nvrtcGetProgramLogSize", "hiprtcGetProgramLogSize", "library"); + subst("nvrtcVersion", "hiprtcVersion", "library"); + subst("curand", "hiprand", "device_library"); + subst("curand_discrete", "hiprand_discrete", "device_library"); + subst("curand_discrete4", "hiprand_discrete4", "device_library"); + subst("curand_init", "hiprand_init", "device_library"); + subst("curand_log_normal", "hiprand_log_normal", "device_library"); + subst("curand_log_normal2", "hiprand_log_normal2", "device_library"); + subst("curand_log_normal2_double", "hiprand_log_normal2_double", "device_library"); + subst("curand_log_normal4", "hiprand_log_normal4", "device_library"); + subst("curand_log_normal4_double", "hiprand_log_normal4_double", "device_library"); + subst("curand_log_normal_double", "hiprand_log_normal_double", "device_library"); + subst("curand_normal", "hiprand_normal", "device_library"); + subst("curand_normal2", "hiprand_normal2", "device_library"); + subst("curand_normal2_double", "hiprand_normal2_double", "device_library"); + subst("curand_normal4", "hiprand_normal4", "device_library"); + subst("curand_normal4_double", "hiprand_normal4_double", "device_library"); + subst("curand_normal_double", "hiprand_normal_double", "device_library"); + subst("curand_poisson", "hiprand_poisson", "device_library"); + subst("curand_poisson4", "hiprand_poisson4", "device_library"); + subst("curand_uniform", "hiprand_uniform", "device_library"); + subst("curand_uniform2_double", "hiprand_uniform2_double", "device_library"); + subst("curand_uniform4", "hiprand_uniform4", "device_library"); + subst("curand_uniform4_double", "hiprand_uniform4_double", "device_library"); + subst("curand_uniform_double", "hiprand_uniform_double", "device_library"); + subst("__half", "__half", "device_type"); + subst("__half2", "__half2", "device_type"); + subst("__half2_raw", "__half2_raw", "device_type"); + subst("__half_raw", "__half_raw", "device_type"); + subst("__nv_bfloat16", "hip_bfloat16", "device_type"); + subst("caffe2\/core\/common_cudnn.h", "caffe2\/core\/hip\/common_miopen.h", "include"); + subst("caffe2\/operators\/spatial_batch_norm_op.h", "caffe2\/operators\/hip\/spatial_batch_norm_op_miopen.hip", "include"); + subst("channel_descriptor.h", "hip\/channel_descriptor.h", "include"); + subst("cooperative_groups.h", "hip\/hip_cooperative_groups.h", "include"); + subst("cublasLt.h", "hipblaslt.h", "include"); + subst("cublas_api.h", "hipblas.h", "include"); + subst("cuda_fp16.h", "hip\/hip_fp16.h", "include"); + subst("cuda_profiler_api.h", "hip\/hip_runtime_api.h", "include"); + subst("cuda_runtime_api.h", "hip\/hip_runtime_api.h", "include"); + subst("cuda_texture_types.h", "hip\/hip_texture_types.h", "include"); + subst("cufftXt.h", "hipfft\/hipfftXt.h", "include"); + subst("curand_discrete.h", "hiprand\/hiprand_kernel.h", "include"); + subst("curand_discrete2.h", "hiprand\/hiprand_kernel.h", "include"); + subst("curand_globals.h", "hiprand\/hiprand_kernel.h", "include"); + subst("curand_kernel.h", "hiprand\/hiprand_kernel.h", "include"); + subst("curand_lognormal.h", "hiprand\/hiprand_kernel.h", "include"); + subst("curand_mrg32k3a.h", "hiprand\/hiprand_kernel.h", "include"); + subst("curand_mtgp32.h", "hiprand\/hiprand_kernel.h", "include"); + subst("curand_mtgp32_host.h", "hiprand\/hiprand_mtgp32_host.h", "include"); + subst("curand_mtgp32_kernel.h", "hiprand\/hiprand_kernel.h", "include"); + subst("curand_mtgp32dc_p_11213.h", "rocrand_mtgp32_11213.h", "include"); + subst("curand_normal.h", "hiprand\/hiprand_kernel.h", "include"); + subst("curand_normal_static.h", "hiprand\/hiprand_kernel.h", "include"); + subst("curand_philox4x32_x.h", "hiprand\/hiprand_kernel.h", "include"); + subst("curand_poisson.h", "hiprand\/hiprand_kernel.h", "include"); + subst("curand_precalc.h", "hiprand\/hiprand_kernel.h", "include"); + subst("curand_uniform.h", "hiprand\/hiprand_kernel.h", "include"); + subst("device_functions.h", "hip\/device_functions.h", "include"); + subst("device_launch_parameters.h", "", "include"); + subst("driver_types.h", "hip\/driver_types.h", "include"); + subst("library_types.h", "hip\/library_types.h", "include"); + subst("math_constants.h", "hip\/hip_math_constants.h", "include"); + subst("texture_fetch_functions.h", "", "include"); + subst("vector_types.h", "hip\/hip_vector_types.h", "include"); + subst("cuComplex.h", "hip\/hip_complex.h", "include_cuda_main_header"); + subst("cub\/cub.cuh", "hipcub\/hipcub.hpp", "include_cuda_main_header"); + subst("cublas.h", "hipblas.h", "include_cuda_main_header"); + subst("cuda.h", "hip\/hip_runtime.h", "include_cuda_main_header"); + subst("cuda_runtime.h", "hip\/hip_runtime.h", "include_cuda_main_header"); + subst("cudnn.h", "hipDNN.h", "include_cuda_main_header"); + subst("cufft.h", "hipfft\/hipfft.h", "include_cuda_main_header"); + subst("curand.h", "hiprand\/hiprand.h", "include_cuda_main_header"); + subst("cusolverDn.h", "hipsolver.h", "include_cuda_main_header"); + subst("cusolverMg.h", "hipsolver.h", "include_cuda_main_header"); + subst("cusolverRf.h", "hipsolver.h", "include_cuda_main_header"); + subst("cusolverSp.h", "hipsolver.h", "include_cuda_main_header"); + subst("cusolverSp_LOWLEVEL_PREVIEW.h", "hipsolver.h", "include_cuda_main_header"); + subst("cusolver_common.h", "hipsolver.h", "include_cuda_main_header"); + subst("cusparse.h", "hipsparse.h", "include_cuda_main_header"); + subst("nvrtc.h", "hiprtc.h", "include_cuda_main_header"); + subst("cublas_v2.h", "hipblas.h", "include_cuda_main_header_v2"); + subst("cusparse_v2.h", "hipsparse.h", "include_cuda_main_header_v2"); + subst("CUDAContext", "HIPContext", "type"); + subst("CUDA_ARRAY3D_DESCRIPTOR", "HIP_ARRAY3D_DESCRIPTOR", "type"); + subst("CUDA_ARRAY3D_DESCRIPTOR_st", "HIP_ARRAY3D_DESCRIPTOR", "type"); + subst("CUDA_ARRAY3D_DESCRIPTOR_v2", "HIP_ARRAY3D_DESCRIPTOR", "type"); + subst("CUDA_ARRAY_DESCRIPTOR", "HIP_ARRAY_DESCRIPTOR", "type"); + subst("CUDA_ARRAY_DESCRIPTOR_st", "HIP_ARRAY_DESCRIPTOR", "type"); + subst("CUDA_ARRAY_DESCRIPTOR_v1", "HIP_ARRAY_DESCRIPTOR", "type"); + subst("CUDA_ARRAY_DESCRIPTOR_v1_st", "HIP_ARRAY_DESCRIPTOR", "type"); + subst("CUDA_ARRAY_DESCRIPTOR_v2", "HIP_ARRAY_DESCRIPTOR", "type"); + subst("CUDA_CHILD_GRAPH_NODE_PARAMS", "hipChildGraphNodeParams", "type"); + subst("CUDA_CHILD_GRAPH_NODE_PARAMS_st", "hipChildGraphNodeParams", "type"); + subst("CUDA_EVENT_RECORD_NODE_PARAMS", "hipEventRecordNodeParams", "type"); + subst("CUDA_EVENT_RECORD_NODE_PARAMS_st", "hipEventRecordNodeParams", "type"); + subst("CUDA_EVENT_WAIT_NODE_PARAMS", "hipEventWaitNodeParams", "type"); + subst("CUDA_EVENT_WAIT_NODE_PARAMS_st", "hipEventWaitNodeParams", "type"); + subst("CUDA_EXTERNAL_MEMORY_BUFFER_DESC", "hipExternalMemoryBufferDesc", "type"); + subst("CUDA_EXTERNAL_MEMORY_BUFFER_DESC_st", "hipExternalMemoryBufferDesc_st", "type"); + subst("CUDA_EXTERNAL_MEMORY_BUFFER_DESC_v1", "hipExternalMemoryBufferDesc", "type"); + subst("CUDA_EXTERNAL_MEMORY_HANDLE_DESC", "hipExternalMemoryHandleDesc", "type"); + subst("CUDA_EXTERNAL_MEMORY_HANDLE_DESC_st", "hipExternalMemoryHandleDesc_st", "type"); + subst("CUDA_EXTERNAL_MEMORY_HANDLE_DESC_v1", "hipExternalMemoryHandleDesc", "type"); + subst("CUDA_EXTERNAL_SEMAPHORE_HANDLE_DESC", "hipExternalSemaphoreHandleDesc", "type"); + subst("CUDA_EXTERNAL_SEMAPHORE_HANDLE_DESC_st", "hipExternalSemaphoreHandleDesc_st", "type"); + subst("CUDA_EXTERNAL_SEMAPHORE_HANDLE_DESC_v1", "hipExternalSemaphoreHandleDesc", "type"); + subst("CUDA_EXTERNAL_SEMAPHORE_SIGNAL_PARAMS", "hipExternalSemaphoreSignalParams", "type"); + subst("CUDA_EXTERNAL_SEMAPHORE_SIGNAL_PARAMS_st", "hipExternalSemaphoreSignalParams_st", "type"); + subst("CUDA_EXTERNAL_SEMAPHORE_SIGNAL_PARAMS_v1", "hipExternalSemaphoreSignalParams", "type"); + subst("CUDA_EXTERNAL_SEMAPHORE_WAIT_PARAMS", "hipExternalSemaphoreWaitParams", "type"); + subst("CUDA_EXTERNAL_SEMAPHORE_WAIT_PARAMS_st", "hipExternalSemaphoreWaitParams_st", "type"); + subst("CUDA_EXTERNAL_SEMAPHORE_WAIT_PARAMS_v1", "hipExternalSemaphoreWaitParams", "type"); + subst("CUDA_EXT_SEM_SIGNAL_NODE_PARAMS", "hipExternalSemaphoreSignalNodeParams", "type"); + subst("CUDA_EXT_SEM_SIGNAL_NODE_PARAMS_st", "hipExternalSemaphoreSignalNodeParams", "type"); + subst("CUDA_EXT_SEM_SIGNAL_NODE_PARAMS_v1", "hipExternalSemaphoreSignalNodeParams", "type"); + subst("CUDA_EXT_SEM_SIGNAL_NODE_PARAMS_v2", "hipExternalSemaphoreSignalNodeParams", "type"); + subst("CUDA_EXT_SEM_SIGNAL_NODE_PARAMS_v2_st", "hipExternalSemaphoreSignalNodeParams", "type"); + subst("CUDA_EXT_SEM_WAIT_NODE_PARAMS", "hipExternalSemaphoreWaitNodeParams", "type"); + subst("CUDA_EXT_SEM_WAIT_NODE_PARAMS_st", "hipExternalSemaphoreWaitNodeParams", "type"); + subst("CUDA_EXT_SEM_WAIT_NODE_PARAMS_v1", "hipExternalSemaphoreWaitNodeParams", "type"); + subst("CUDA_EXT_SEM_WAIT_NODE_PARAMS_v2", "hipExternalSemaphoreWaitNodeParams", "type"); + subst("CUDA_EXT_SEM_WAIT_NODE_PARAMS_v2_st", "hipExternalSemaphoreWaitNodeParams", "type"); + subst("CUDA_GRAPH_INSTANTIATE_PARAMS", "hipGraphInstantiateParams", "type"); + subst("CUDA_GRAPH_INSTANTIATE_PARAMS_st", "hipGraphInstantiateParams", "type"); + subst("CUDA_HOST_NODE_PARAMS", "hipHostNodeParams", "type"); + subst("CUDA_HOST_NODE_PARAMS_st", "hipHostNodeParams", "type"); + subst("CUDA_HOST_NODE_PARAMS_v1", "hipHostNodeParams", "type"); + subst("CUDA_KERNEL_NODE_PARAMS", "hipKernelNodeParams", "type"); + subst("CUDA_KERNEL_NODE_PARAMS_st", "hipKernelNodeParams", "type"); + subst("CUDA_KERNEL_NODE_PARAMS_v1", "hipKernelNodeParams", "type"); + subst("CUDA_LAUNCH_PARAMS", "hipFunctionLaunchParams", "type"); + subst("CUDA_LAUNCH_PARAMS_st", "hipFunctionLaunchParams_t", "type"); + subst("CUDA_LAUNCH_PARAMS_v1", "hipFunctionLaunchParams", "type"); + subst("CUDA_MEMCPY2D", "hip_Memcpy2D", "type"); + subst("CUDA_MEMCPY2D_st", "hip_Memcpy2D", "type"); + subst("CUDA_MEMCPY2D_v1", "hip_Memcpy2D", "type"); + subst("CUDA_MEMCPY2D_v1_st", "hip_Memcpy2D", "type"); + subst("CUDA_MEMCPY2D_v2", "hip_Memcpy2D", "type"); + subst("CUDA_MEMCPY3D", "HIP_MEMCPY3D", "type"); + subst("CUDA_MEMCPY3D_st", "HIP_MEMCPY3D", "type"); + subst("CUDA_MEMCPY3D_v1", "HIP_MEMCPY3D", "type"); + subst("CUDA_MEMCPY3D_v1_st", "HIP_MEMCPY3D", "type"); + subst("CUDA_MEMCPY3D_v2", "HIP_MEMCPY3D", "type"); + subst("CUDA_MEMCPY_NODE_PARAMS", "hipMemcpyNodeParams", "type"); + subst("CUDA_MEMCPY_NODE_PARAMS_st", "hipMemcpyNodeParams", "type"); + subst("CUDA_MEMSET_NODE_PARAMS", "HIP_MEMSET_NODE_PARAMS", "type"); + subst("CUDA_MEMSET_NODE_PARAMS_st", "HIP_MEMSET_NODE_PARAMS", "type"); + subst("CUDA_MEMSET_NODE_PARAMS_v1", "HIP_MEMSET_NODE_PARAMS", "type"); + subst("CUDA_MEM_ALLOC_NODE_PARAMS", "hipMemAllocNodeParams", "type"); + subst("CUDA_MEM_ALLOC_NODE_PARAMS_st", "hipMemAllocNodeParams", "type"); + subst("CUDA_MEM_ALLOC_NODE_PARAMS_v1", "hipMemAllocNodeParams", "type"); + subst("CUDA_MEM_ALLOC_NODE_PARAMS_v1_st", "hipMemAllocNodeParams", "type"); + subst("CUDA_MEM_FREE_NODE_PARAMS", "hipMemFreeNodeParams", "type"); + subst("CUDA_MEM_FREE_NODE_PARAMS_st", "hipMemFreeNodeParams", "type"); + subst("CUDA_RESOURCE_DESC", "HIP_RESOURCE_DESC", "type"); + subst("CUDA_RESOURCE_DESC_st", "HIP_RESOURCE_DESC_st", "type"); + subst("CUDA_RESOURCE_DESC_v1", "HIP_RESOURCE_DESC", "type"); + subst("CUDA_RESOURCE_VIEW_DESC", "HIP_RESOURCE_VIEW_DESC", "type"); + subst("CUDA_RESOURCE_VIEW_DESC_st", "HIP_RESOURCE_VIEW_DESC_st", "type"); + subst("CUDA_RESOURCE_VIEW_DESC_v1", "HIP_RESOURCE_VIEW_DESC", "type"); + subst("CUDA_TEXTURE_DESC", "HIP_TEXTURE_DESC", "type"); + subst("CUDA_TEXTURE_DESC_st", "HIP_TEXTURE_DESC_st", "type"); + subst("CUDA_TEXTURE_DESC_v1", "HIP_TEXTURE_DESC", "type"); + subst("CUGLDeviceList", "hipGLDeviceList", "type"); + subst("CUGLDeviceList_enum", "hipGLDeviceList", "type"); + subst("CUGPUDirectRDMAWritesOrdering", "hipGPUDirectRDMAWritesOrdering", "type"); + subst("CUGPUDirectRDMAWritesOrdering_enum", "hipGPUDirectRDMAWritesOrdering", "type"); + subst("CUaccessPolicyWindow", "hipAccessPolicyWindow", "type"); + subst("CUaccessPolicyWindow_st", "hipAccessPolicyWindow", "type"); + subst("CUaccessProperty", "hipAccessProperty", "type"); + subst("CUaccessProperty_enum", "hipAccessProperty", "type"); + subst("CUaddress_mode", "HIPaddress_mode", "type"); + subst("CUaddress_mode_enum", "HIPaddress_mode_enum", "type"); + subst("CUarray", "hipArray_t", "type"); + subst("CUarrayMapInfo", "hipArrayMapInfo", "type"); + subst("CUarrayMapInfo_st", "hipArrayMapInfo", "type"); + subst("CUarrayMapInfo_v1", "hipArrayMapInfo", "type"); + subst("CUarraySparseSubresourceType", "hipArraySparseSubresourceType", "type"); + subst("CUarraySparseSubresourceType_enum", "hipArraySparseSubresourceType", "type"); + subst("CUarray_format", "hipArray_Format", "type"); + subst("CUarray_format_enum", "hipArray_Format", "type"); + subst("CUarray_st", "hipArray", "type"); + subst("CUcomputemode", "hipComputeMode", "type"); + subst("CUcomputemode_enum", "hipComputeMode", "type"); + subst("CUcontext", "hipCtx_t", "type"); + subst("CUctx_st", "ihipCtx_t", "type"); + subst("CUdevice", "hipDevice_t", "type"); + subst("CUdevice_P2PAttribute", "hipDeviceP2PAttr", "type"); + subst("CUdevice_P2PAttribute_enum", "hipDeviceP2PAttr", "type"); + subst("CUdevice_attribute", "hipDeviceAttribute_t", "type"); + subst("CUdevice_attribute_enum", "hipDeviceAttribute_t", "type"); + subst("CUdevice_v1", "hipDevice_t", "type"); + subst("CUdeviceptr", "hipDeviceptr_t", "type"); + subst("CUdeviceptr_v1", "hipDeviceptr_t", "type"); + subst("CUdeviceptr_v2", "hipDeviceptr_t", "type"); + subst("CUevent", "hipEvent_t", "type"); + subst("CUevent_st", "ihipEvent_t", "type"); + subst("CUexternalMemory", "hipExternalMemory_t", "type"); + subst("CUexternalMemoryHandleType", "hipExternalMemoryHandleType", "type"); + subst("CUexternalMemoryHandleType_enum", "hipExternalMemoryHandleType_enum", "type"); + subst("CUexternalSemaphore", "hipExternalSemaphore_t", "type"); + subst("CUexternalSemaphoreHandleType", "hipExternalSemaphoreHandleType", "type"); + subst("CUexternalSemaphoreHandleType_enum", "hipExternalSemaphoreHandleType_enum", "type"); + subst("CUfilter_mode", "HIPfilter_mode", "type"); + subst("CUfilter_mode_enum", "HIPfilter_mode_enum", "type"); + subst("CUflushGPUDirectRDMAWritesOptions", "hipFlushGPUDirectRDMAWritesOptions", "type"); + subst("CUflushGPUDirectRDMAWritesOptions_enum", "hipFlushGPUDirectRDMAWritesOptions", "type"); + subst("CUfunc_cache", "hipFuncCache_t", "type"); + subst("CUfunc_cache_enum", "hipFuncCache_t", "type"); + subst("CUfunc_st", "ihipModuleSymbol_t", "type"); + subst("CUfunction", "hipFunction_t", "type"); + subst("CUfunction_attribute", "hipFunction_attribute", "type"); + subst("CUfunction_attribute_enum", "hipFunction_attribute", "type"); + subst("CUgraph", "hipGraph_t", "type"); + subst("CUgraphDebugDot_flags", "hipGraphDebugDotFlags", "type"); + subst("CUgraphDebugDot_flags_enum", "hipGraphDebugDotFlags", "type"); + subst("CUgraphExec", "hipGraphExec_t", "type"); + subst("CUgraphExecUpdateResult", "hipGraphExecUpdateResult", "type"); + subst("CUgraphExecUpdateResult_enum", "hipGraphExecUpdateResult", "type"); + subst("CUgraphExec_st", "hipGraphExec", "type"); + subst("CUgraphInstantiateResult", "hipGraphInstantiateResult", "type"); + subst("CUgraphInstantiateResult_enum", "hipGraphInstantiateResult", "type"); + subst("CUgraphInstantiate_flags", "hipGraphInstantiateFlags", "type"); + subst("CUgraphInstantiate_flags_enum", "hipGraphInstantiateFlags", "type"); + subst("CUgraphMem_attribute", "hipGraphMemAttributeType", "type"); + subst("CUgraphMem_attribute_enum", "hipGraphMemAttributeType", "type"); + subst("CUgraphNode", "hipGraphNode_t", "type"); + subst("CUgraphNodeParams", "hipGraphNodeParams", "type"); + subst("CUgraphNodeParams_st", "hipGraphNodeParams", "type"); + subst("CUgraphNodeType", "hipGraphNodeType", "type"); + subst("CUgraphNodeType_enum", "hipGraphNodeType", "type"); + subst("CUgraphNode_st", "hipGraphNode", "type"); + subst("CUgraph_st", "ihipGraph", "type"); + subst("CUgraphicsRegisterFlags", "hipGraphicsRegisterFlags", "type"); + subst("CUgraphicsRegisterFlags_enum", "hipGraphicsRegisterFlags", "type"); + subst("CUgraphicsResource", "hipGraphicsResource_t", "type"); + subst("CUgraphicsResource_st", "hipGraphicsResource", "type"); + subst("CUhostFn", "hipHostFn_t", "type"); + subst("CUipcEventHandle", "hipIpcEventHandle_t", "type"); + subst("CUipcEventHandle_st", "hipIpcEventHandle_st", "type"); + subst("CUipcEventHandle_v1", "hipIpcEventHandle_t", "type"); + subst("CUipcMemHandle", "hipIpcMemHandle_t", "type"); + subst("CUipcMemHandle_st", "hipIpcMemHandle_st", "type"); + subst("CUipcMemHandle_v1", "hipIpcMemHandle_t", "type"); + subst("CUjitInputType", "hiprtcJITInputType", "type"); + subst("CUjitInputType_enum", "hiprtcJITInputType", "type"); + subst("CUjit_option", "hipJitOption", "type"); + subst("CUjit_option_enum", "hipJitOption", "type"); + subst("CUkernelNodeAttrID", "hipKernelNodeAttrID", "type"); + subst("CUkernelNodeAttrID_enum", "hipKernelNodeAttrID", "type"); + subst("CUkernelNodeAttrValue", "hipKernelNodeAttrValue", "type"); + subst("CUkernelNodeAttrValue_union", "hipKernelNodeAttrValue", "type"); + subst("CUkernelNodeAttrValue_v1", "hipKernelNodeAttrValue", "type"); + subst("CUlimit", "hipLimit_t", "type"); + subst("CUlimit_enum", "hipLimit_t", "type"); + subst("CUlinkState", "hiprtcLinkState", "type"); + subst("CUlinkState_st", "ihiprtcLinkState", "type"); + subst("CUmemAccessDesc", "hipMemAccessDesc", "type"); + subst("CUmemAccessDesc_st", "hipMemAccessDesc", "type"); + subst("CUmemAccessDesc_v1", "hipMemAccessDesc", "type"); + subst("CUmemAccess_flags", "hipMemAccessFlags", "type"); + subst("CUmemAccess_flags_enum", "hipMemAccessFlags", "type"); + subst("CUmemAllocationGranularity_flags", "hipMemAllocationGranularity_flags", "type"); + subst("CUmemAllocationGranularity_flags_enum", "hipMemAllocationGranularity_flags", "type"); + subst("CUmemAllocationHandleType", "hipMemAllocationHandleType", "type"); + subst("CUmemAllocationHandleType_enum", "hipMemAllocationHandleType", "type"); + subst("CUmemAllocationProp", "hipMemAllocationProp", "type"); + subst("CUmemAllocationProp_st", "hipMemAllocationProp", "type"); + subst("CUmemAllocationProp_v1", "hipMemAllocationProp", "type"); + subst("CUmemAllocationType", "hipMemAllocationType", "type"); + subst("CUmemAllocationType_enum", "hipMemAllocationType", "type"); + subst("CUmemGenericAllocationHandle", "hipMemGenericAllocationHandle_t", "type"); + subst("CUmemGenericAllocationHandle_v1", "hipMemGenericAllocationHandle_t", "type"); + subst("CUmemHandleType", "hipMemHandleType", "type"); + subst("CUmemHandleType_enum", "hipMemHandleType", "type"); + subst("CUmemLocation", "hipMemLocation", "type"); + subst("CUmemLocationType", "hipMemLocationType", "type"); + subst("CUmemLocationType_enum", "hipMemLocationType", "type"); + subst("CUmemLocation_st", "hipMemLocation", "type"); + subst("CUmemLocation_v1", "hipMemLocation", "type"); + subst("CUmemOperationType", "hipMemOperationType", "type"); + subst("CUmemOperationType_enum", "hipMemOperationType", "type"); + subst("CUmemPoolHandle_st", "ihipMemPoolHandle_t", "type"); + subst("CUmemPoolProps", "hipMemPoolProps", "type"); + subst("CUmemPoolProps_st", "hipMemPoolProps", "type"); + subst("CUmemPoolProps_v1", "hipMemPoolProps", "type"); + subst("CUmemPoolPtrExportData", "hipMemPoolPtrExportData", "type"); + subst("CUmemPoolPtrExportData_st", "hipMemPoolPtrExportData", "type"); + subst("CUmemPoolPtrExportData_v1", "hipMemPoolPtrExportData", "type"); + subst("CUmemPool_attribute", "hipMemPoolAttr", "type"); + subst("CUmemPool_attribute_enum", "hipMemPoolAttr", "type"); + subst("CUmem_advise", "hipMemoryAdvise", "type"); + subst("CUmem_advise_enum", "hipMemoryAdvise", "type"); + subst("CUmem_range_attribute", "hipMemRangeAttribute", "type"); + subst("CUmem_range_attribute_enum", "hipMemRangeAttribute", "type"); + subst("CUmemoryPool", "hipMemPool_t", "type"); + subst("CUmemorytype", "hipMemoryType", "type"); + subst("CUmemorytype_enum", "hipMemoryType", "type"); + subst("CUmipmappedArray", "hipMipmappedArray_t", "type"); + subst("CUmipmappedArray_st", "hipMipmappedArray", "type"); + subst("CUmod_st", "ihipModule_t", "type"); + subst("CUmodule", "hipModule_t", "type"); + subst("CUoccupancyB2DSize", "void*", "type"); + subst("CUpointer_attribute", "hipPointer_attribute", "type"); + subst("CUpointer_attribute_enum", "hipPointer_attribute", "type"); + subst("CUresourceViewFormat", "HIPresourceViewFormat", "type"); + subst("CUresourceViewFormat_enum", "HIPresourceViewFormat_enum", "type"); + subst("CUresourcetype", "HIPresourcetype", "type"); + subst("CUresourcetype_enum", "HIPresourcetype_enum", "type"); + subst("CUresult", "hipError_t", "type"); + subst("CUsharedconfig", "hipSharedMemConfig", "type"); + subst("CUsharedconfig_enum", "hipSharedMemConfig", "type"); + subst("CUstream", "hipStream_t", "type"); + subst("CUstreamCallback", "hipStreamCallback_t", "type"); + subst("CUstreamCaptureMode", "hipStreamCaptureMode", "type"); + subst("CUstreamCaptureMode_enum", "hipStreamCaptureMode", "type"); + subst("CUstreamCaptureStatus", "hipStreamCaptureStatus", "type"); + subst("CUstreamCaptureStatus_enum", "hipStreamCaptureStatus", "type"); + subst("CUstreamUpdateCaptureDependencies_flags", "hipStreamUpdateCaptureDependenciesFlags", "type"); + subst("CUstreamUpdateCaptureDependencies_flags_enum", "hipStreamUpdateCaptureDependenciesFlags", "type"); + subst("CUstream_st", "ihipStream_t", "type"); + subst("CUsurfObject", "hipSurfaceObject_t", "type"); + subst("CUsurfObject_v1", "hipSurfaceObject_t", "type"); + subst("CUtexObject", "hipTextureObject_t", "type"); + subst("CUtexObject_v1", "hipTextureObject_t", "type"); + subst("CUtexref", "hipTexRef", "type"); + subst("CUtexref_st", "textureReference", "type"); + subst("CUuserObject", "hipUserObject_t", "type"); + subst("CUuserObjectRetain_flags", "hipUserObjectRetainFlags", "type"); + subst("CUuserObjectRetain_flags_enum", "hipUserObjectRetainFlags", "type"); + subst("CUuserObject_flags", "hipUserObjectFlags", "type"); + subst("CUuserObject_flags_enum", "hipUserObjectFlags", "type"); + subst("CUuserObject_st", "hipUserObject", "type"); + subst("CUuuid", "hipUUID", "type"); + subst("CUuuid_st", "hipUUID_t", "type"); + subst("GLenum", "GLenum", "type"); + subst("GLuint", "GLuint", "type"); + subst("bsric02Info", "bsric02Info", "type"); + subst("bsric02Info_t", "bsric02Info_t", "type"); + subst("bsrilu02Info", "bsrilu02Info", "type"); + subst("bsrilu02Info_t", "bsrilu02Info_t", "type"); + subst("bsrsm2Info", "bsrsm2Info", "type"); + subst("bsrsm2Info_t", "bsrsm2Info_t", "type"); + subst("bsrsv2Info", "bsrsv2Info", "type"); + subst("bsrsv2Info_t", "bsrsv2Info_t", "type"); + subst("csrgemm2Info", "csrgemm2Info", "type"); + subst("csrgemm2Info_t", "csrgemm2Info_t", "type"); + subst("csric02Info", "csric02Info", "type"); + subst("csric02Info_t", "csric02Info_t", "type"); + subst("csrilu02Info", "csrilu02Info", "type"); + subst("csrilu02Info_t", "csrilu02Info_t", "type"); + subst("csrsm2Info_t", "csrsm2Info_t", "type"); + subst("csrsv2Info_t", "csrsv2Info_t", "type"); + subst("csru2csrInfo", "csru2csrInfo", "type"); + subst("csru2csrInfo_t", "csru2csrInfo_t", "type"); + subst("cuComplex", "hipComplex", "type"); + subst("cuDoubleComplex", "hipDoubleComplex", "type"); + subst("cuFloatComplex", "hipFloatComplex", "type"); + subst("cublasAtomicsMode_t", "hipblasAtomicsMode_t", "type"); + subst("cublasComputeType_t", "hipblasComputeType_t", "type"); + subst("cublasDataType_t", "hipDataType", "type"); + subst("cublasDiagType_t", "hipblasDiagType_t", "type"); + subst("cublasFillMode_t", "hipblasFillMode_t", "type"); + subst("cublasGemmAlgo_t", "hipblasGemmAlgo_t", "type"); + subst("cublasHandle_t", "hipblasHandle_t", "type"); + subst("cublasLtEpilogue_t", "hipblasLtEpilogue_t", "type"); + subst("cublasLtHandle_t", "hipblasLtHandle_t", "type"); + subst("cublasLtMatmulAlgo_t", "hipblasLtMatmulAlgo_t", "type"); + subst("cublasLtMatmulDescAttributes_t", "hipblasLtMatmulDescAttributes_t", "type"); + subst("cublasLtMatmulDescOpaque_t", "hipblasLtMatmulDescOpaque_t", "type"); + subst("cublasLtMatmulDesc_t", "hipblasLtMatmulDesc_t", "type"); + subst("cublasLtMatmulHeuristicResult_t", "hipblasLtMatmulHeuristicResult_t", "type"); + subst("cublasLtMatmulPreferenceAttributes_t", "hipblasLtMatmulPreferenceAttributes_t", "type"); + subst("cublasLtMatmulPreferenceOpaque_t", "hipblasLtMatmulPreferenceOpaque_t", "type"); + subst("cublasLtMatmulPreference_t", "hipblasLtMatmulPreference_t", "type"); + subst("cublasLtMatrixLayoutAttribute_t", "hipblasLtMatrixLayoutAttribute_t", "type"); + subst("cublasLtMatrixLayoutOpaque_t", "hipblasLtMatrixLayoutOpaque_t", "type"); + subst("cublasLtMatrixLayoutStruct", "hipblasLtMatrixLayoutOpaque_t", "type"); + subst("cublasLtMatrixLayout_t", "hipblasLtMatrixLayout_t", "type"); + subst("cublasLtMatrixTransformDescAttributes_t", "hipblasLtMatrixTransformDescAttributes_t", "type"); + subst("cublasLtMatrixTransformDescOpaque_t", "hipblasLtMatrixTransformDescOpaque_t", "type"); + subst("cublasLtMatrixTransformDesc_t", "hipblasLtMatrixTransformDesc_t", "type"); + subst("cublasLtPointerMode_t", "hipblasLtPointerMode_t", "type"); + subst("cublasMath_t", "hipblasMath_t", "type"); + subst("cublasOperation_t", "hipblasOperation_t", "type"); + subst("cublasPointerMode_t", "hipblasPointerMode_t", "type"); + subst("cublasSideMode_t", "hipblasSideMode_t", "type"); + subst("cublasStatus", "hipblasStatus_t", "type"); + subst("cublasStatus_t", "hipblasStatus_t", "type"); + subst("cudaAccessPolicyWindow", "hipAccessPolicyWindow", "type"); + subst("cudaAccessProperty", "hipAccessProperty", "type"); + subst("cudaArray", "hipArray", "type"); + subst("cudaArray_const_t", "hipArray_const_t", "type"); + subst("cudaArray_t", "hipArray_t", "type"); + subst("cudaChannelFormatDesc", "hipChannelFormatDesc", "type"); + subst("cudaChannelFormatKind", "hipChannelFormatKind", "type"); + subst("cudaChildGraphNodeParams", "hipChildGraphNodeParams", "type"); + subst("cudaComputeMode", "hipComputeMode", "type"); + subst("cudaDataType", "hipDataType", "type"); + subst("cudaDataType_t", "hipDataType", "type"); + subst("cudaDeviceAttr", "hipDeviceAttribute_t", "type"); + subst("cudaDeviceP2PAttr", "hipDeviceP2PAttr", "type"); + subst("cudaDeviceProp", "hipDeviceProp_t", "type"); + subst("cudaError", "hipError_t", "type"); + subst("cudaError_enum", "hipError_t", "type"); + subst("cudaError_t", "hipError_t", "type"); + subst("cudaEventRecordNodeParams", "hipEventRecordNodeParams", "type"); + subst("cudaEventWaitNodeParams", "hipEventWaitNodeParams", "type"); + subst("cudaEvent_t", "hipEvent_t", "type"); + subst("cudaExtent", "hipExtent", "type"); + subst("cudaExternalMemoryBufferDesc", "hipExternalMemoryBufferDesc", "type"); + subst("cudaExternalMemoryHandleDesc", "hipExternalMemoryHandleDesc", "type"); + subst("cudaExternalMemoryHandleType", "hipExternalMemoryHandleType", "type"); + subst("cudaExternalMemory_t", "hipExternalMemory_t", "type"); + subst("cudaExternalSemaphoreHandleDesc", "hipExternalSemaphoreHandleDesc", "type"); + subst("cudaExternalSemaphoreHandleType", "hipExternalSemaphoreHandleType", "type"); + subst("cudaExternalSemaphoreSignalNodeParams", "hipExternalSemaphoreSignalNodeParams", "type"); + subst("cudaExternalSemaphoreSignalNodeParamsV2", "hipExternalSemaphoreSignalNodeParams", "type"); + subst("cudaExternalSemaphoreSignalParams", "hipExternalSemaphoreSignalParams", "type"); + subst("cudaExternalSemaphoreSignalParams_v1", "hipExternalSemaphoreSignalParams", "type"); + subst("cudaExternalSemaphoreWaitNodeParams", "hipExternalSemaphoreWaitNodeParams", "type"); + subst("cudaExternalSemaphoreWaitNodeParamsV2", "hipExternalSemaphoreWaitNodeParams", "type"); + subst("cudaExternalSemaphoreWaitParams", "hipExternalSemaphoreWaitParams", "type"); + subst("cudaExternalSemaphoreWaitParams_v1", "hipExternalSemaphoreWaitParams", "type"); + subst("cudaExternalSemaphore_t", "hipExternalSemaphore_t", "type"); + subst("cudaFlushGPUDirectRDMAWritesOptions", "hipFlushGPUDirectRDMAWritesOptions", "type"); + subst("cudaFuncAttribute", "hipFuncAttribute", "type"); + subst("cudaFuncAttributes", "hipFuncAttributes", "type"); + subst("cudaFuncCache", "hipFuncCache_t", "type"); + subst("cudaFunction_t", "hipFunction_t", "type"); + subst("cudaGLDeviceList", "hipGLDeviceList", "type"); + subst("cudaGPUDirectRDMAWritesOrdering", "hipGPUDirectRDMAWritesOrdering", "type"); + subst("cudaGraphDebugDotFlags", "hipGraphDebugDotFlags", "type"); + subst("cudaGraphExecUpdateResult", "hipGraphExecUpdateResult", "type"); + subst("cudaGraphExec_t", "hipGraphExec_t", "type"); + subst("cudaGraphInstantiateFlags", "hipGraphInstantiateFlags", "type"); + subst("cudaGraphInstantiateParams", "hipGraphInstantiateParams", "type"); + subst("cudaGraphInstantiateParams_st", "hipGraphInstantiateParams", "type"); + subst("cudaGraphInstantiateResult", "hipGraphInstantiateResult", "type"); + subst("cudaGraphMemAttributeType", "hipGraphMemAttributeType", "type"); + subst("cudaGraphNodeParams", "hipGraphNodeParams", "type"); + subst("cudaGraphNodeType", "hipGraphNodeType", "type"); + subst("cudaGraphNode_t", "hipGraphNode_t", "type"); + subst("cudaGraph_t", "hipGraph_t", "type"); + subst("cudaGraphicsRegisterFlags", "hipGraphicsRegisterFlags", "type"); + subst("cudaGraphicsResource", "hipGraphicsResource", "type"); + subst("cudaGraphicsResource_t", "hipGraphicsResource_t", "type"); + subst("cudaHostFn_t", "hipHostFn_t", "type"); + subst("cudaHostNodeParams", "hipHostNodeParams", "type"); + subst("cudaIpcEventHandle_st", "hipIpcEventHandle_st", "type"); + subst("cudaIpcEventHandle_t", "hipIpcEventHandle_t", "type"); + subst("cudaIpcMemHandle_st", "hipIpcMemHandle_st", "type"); + subst("cudaIpcMemHandle_t", "hipIpcMemHandle_t", "type"); + subst("cudaKernelNodeAttrID", "hipKernelNodeAttrID", "type"); + subst("cudaKernelNodeAttrValue", "hipKernelNodeAttrValue", "type"); + subst("cudaKernelNodeParams", "hipKernelNodeParams", "type"); + subst("cudaLaunchParams", "hipLaunchParams", "type"); + subst("cudaLimit", "hipLimit_t", "type"); + subst("cudaMemAccessDesc", "hipMemAccessDesc", "type"); + subst("cudaMemAccessFlags", "hipMemAccessFlags", "type"); + subst("cudaMemAllocNodeParams", "hipMemAllocNodeParams", "type"); + subst("cudaMemAllocationHandleType", "hipMemAllocationHandleType", "type"); + subst("cudaMemAllocationType", "hipMemAllocationType", "type"); + subst("cudaMemFreeNodeParams", "hipMemFreeNodeParams", "type"); + subst("cudaMemLocation", "hipMemLocation", "type"); + subst("cudaMemLocationType", "hipMemLocationType", "type"); + subst("cudaMemPoolAttr", "hipMemPoolAttr", "type"); + subst("cudaMemPoolProps", "hipMemPoolProps", "type"); + subst("cudaMemPoolPtrExportData", "hipMemPoolPtrExportData", "type"); + subst("cudaMemPool_t", "hipMemPool_t", "type"); + subst("cudaMemRangeAttribute", "hipMemRangeAttribute", "type"); + subst("cudaMemcpy3DParms", "hipMemcpy3DParms", "type"); + subst("cudaMemcpyKind", "hipMemcpyKind", "type"); + subst("cudaMemcpyNodeParams", "hipMemcpyNodeParams", "type"); + subst("cudaMemoryAdvise", "hipMemoryAdvise", "type"); + subst("cudaMemoryType", "hipMemoryType", "type"); + subst("cudaMemsetParams", "hipMemsetParams", "type"); + subst("cudaMipmappedArray", "hipMipmappedArray", "type"); + subst("cudaMipmappedArray_const_t", "hipMipmappedArray_const_t", "type"); + subst("cudaMipmappedArray_t", "hipMipmappedArray_t", "type"); + subst("cudaPitchedPtr", "hipPitchedPtr", "type"); + subst("cudaPointerAttributes", "hipPointerAttribute_t", "type"); + subst("cudaPos", "hipPos", "type"); + subst("cudaResourceDesc", "hipResourceDesc", "type"); + subst("cudaResourceType", "hipResourceType", "type"); + subst("cudaResourceViewDesc", "hipResourceViewDesc", "type"); + subst("cudaResourceViewFormat", "hipResourceViewFormat", "type"); + subst("cudaSharedMemConfig", "hipSharedMemConfig", "type"); + subst("cudaStreamCallback_t", "hipStreamCallback_t", "type"); + subst("cudaStreamCaptureMode", "hipStreamCaptureMode", "type"); + subst("cudaStreamCaptureStatus", "hipStreamCaptureStatus", "type"); + subst("cudaStreamUpdateCaptureDependenciesFlags", "hipStreamUpdateCaptureDependenciesFlags", "type"); + subst("cudaStream_t", "hipStream_t", "type"); + subst("cudaSurfaceBoundaryMode", "hipSurfaceBoundaryMode", "type"); + subst("cudaSurfaceObject_t", "hipSurfaceObject_t", "type"); + subst("cudaTextureAddressMode", "hipTextureAddressMode", "type"); + subst("cudaTextureDesc", "hipTextureDesc", "type"); + subst("cudaTextureFilterMode", "hipTextureFilterMode", "type"); + subst("cudaTextureObject_t", "hipTextureObject_t", "type"); + subst("cudaTextureReadMode", "hipTextureReadMode", "type"); + subst("cudaUUID_t", "hipUUID", "type"); + subst("cudaUserObjectFlags", "hipUserObjectFlags", "type"); + subst("cudaUserObjectRetainFlags", "hipUserObjectRetainFlags", "type"); + subst("cudaUserObject_t", "hipUserObject_t", "type"); + subst("cudnnActivationDescriptor_t", "hipdnnActivationDescriptor_t", "type"); + subst("cudnnActivationMode_t", "hipdnnActivationMode_t", "type"); + subst("cudnnBatchNormMode_t", "hipdnnBatchNormMode_t", "type"); + subst("cudnnConvolutionBwdDataAlgoPerfStruct", "hipdnnConvolutionBwdDataAlgoPerf_t", "type"); + subst("cudnnConvolutionBwdDataAlgoPerf_t", "hipdnnConvolutionBwdDataAlgoPerf_t", "type"); + subst("cudnnConvolutionBwdDataAlgo_t", "hipdnnConvolutionBwdDataAlgo_t", "type"); + subst("cudnnConvolutionBwdDataPreference_t", "hipdnnConvolutionBwdDataPreference_t", "type"); + subst("cudnnConvolutionBwdFilterAlgoPerfStruct", "hipdnnConvolutionBwdFilterAlgoPerf_t", "type"); + subst("cudnnConvolutionBwdFilterAlgoPerf_t", "hipdnnConvolutionBwdFilterAlgoPerf_t", "type"); + subst("cudnnConvolutionBwdFilterAlgo_t", "hipdnnConvolutionBwdFilterAlgo_t", "type"); + subst("cudnnConvolutionBwdFilterPreference_t", "hipdnnConvolutionBwdFilterPreference_t", "type"); + subst("cudnnConvolutionDescriptor_t", "hipdnnConvolutionDescriptor_t", "type"); + subst("cudnnConvolutionFwdAlgoPerfStruct", "hipdnnConvolutionFwdAlgoPerf_t", "type"); + subst("cudnnConvolutionFwdAlgoPerf_t", "hipdnnConvolutionFwdAlgoPerf_t", "type"); + subst("cudnnConvolutionFwdAlgo_t", "hipdnnConvolutionFwdAlgo_t", "type"); + subst("cudnnConvolutionFwdPreference_t", "hipdnnConvolutionFwdPreference_t", "type"); + subst("cudnnConvolutionMode_t", "hipdnnConvolutionMode_t", "type"); + subst("cudnnDataType_t", "hipdnnDataType_t", "type"); + subst("cudnnDirectionMode_t", "hipdnnDirectionMode_t", "type"); + subst("cudnnDropoutDescriptor_t", "hipdnnDropoutDescriptor_t", "type"); + subst("cudnnFilterDescriptor_t", "hipdnnFilterDescriptor_t", "type"); + subst("cudnnHandle_t", "hipdnnHandle_t", "type"); + subst("cudnnIndicesType_t", "hipdnnIndicesType_t", "type"); + subst("cudnnLRNDescriptor_t", "hipdnnLRNDescriptor_t", "type"); + subst("cudnnLRNMode_t", "hipdnnLRNMode_t", "type"); + subst("cudnnMathType_t", "hipdnnMathType_t", "type"); + subst("cudnnNanPropagation_t", "hipdnnNanPropagation_t", "type"); + subst("cudnnOpTensorDescriptor_t", "hipdnnOpTensorDescriptor_t", "type"); + subst("cudnnOpTensorOp_t", "hipdnnOpTensorOp_t", "type"); + subst("cudnnPersistentRNNPlan_t", "hipdnnPersistentRNNPlan_t", "type"); + subst("cudnnPoolingDescriptor_t", "hipdnnPoolingDescriptor_t", "type"); + subst("cudnnPoolingMode_t", "hipdnnPoolingMode_t", "type"); + subst("cudnnRNNAlgo_t", "hipdnnRNNAlgo_t", "type"); + subst("cudnnRNNBiasMode_t", "hipdnnRNNBiasMode_t", "type"); + subst("cudnnRNNDescriptor_t", "hipdnnRNNDescriptor_t", "type"); + subst("cudnnRNNInputMode_t", "hipdnnRNNInputMode_t", "type"); + subst("cudnnRNNMode_t", "hipdnnRNNMode_t", "type"); + subst("cudnnReduceTensorDescriptor_t", "hipdnnReduceTensorDescriptor_t", "type"); + subst("cudnnReduceTensorIndices_t", "hipdnnReduceTensorIndices_t", "type"); + subst("cudnnReduceTensorOp_t", "hipdnnReduceTensorOp_t", "type"); + subst("cudnnSoftmaxAlgorithm_t", "hipdnnSoftmaxAlgorithm_t", "type"); + subst("cudnnSoftmaxMode_t", "hipdnnSoftmaxMode_t", "type"); + subst("cudnnStatus_t", "hipdnnStatus_t", "type"); + subst("cudnnTensorDescriptor_t", "hipdnnTensorDescriptor_t", "type"); + subst("cudnnTensorFormat_t", "hipdnnTensorFormat_t", "type"); + subst("cufftComplex", "hipfftComplex", "type"); + subst("cufftDoubleComplex", "hipfftDoubleComplex", "type"); + subst("cufftDoubleReal", "hipfftDoubleReal", "type"); + subst("cufftHandle", "hipfftHandle", "type"); + subst("cufftReal", "hipfftReal", "type"); + subst("cufftResult", "hipfftResult", "type"); + subst("cufftResult_t", "hipfftResult_t", "type"); + subst("cufftType", "hipfftType", "type"); + subst("cufftType_t", "hipfftType_t", "type"); + subst("cufftXtCallbackType", "hipfftXtCallbackType", "type"); + subst("cufftXtCallbackType_t", "hipfftXtCallbackType_t", "type"); + subst("curandDirectionVectorSet", "hiprandDirectionVectorSet_t", "type"); + subst("curandDirectionVectorSet_t", "hiprandDirectionVectorSet_t", "type"); + subst("curandDirectionVectors32_t", "hiprandDirectionVectors32_t", "type"); + subst("curandDiscreteDistribution_st", "hiprandDiscreteDistribution_st", "type"); + subst("curandDiscreteDistribution_t", "hiprandDiscreteDistribution_t", "type"); + subst("curandGenerator_st", "hiprandGenerator_st", "type"); + subst("curandGenerator_t", "hiprandGenerator_t", "type"); + subst("curandRngType", "hiprandRngType_t", "type"); + subst("curandRngType_t", "hiprandRngType_t", "type"); + subst("curandState", "hiprandState", "type"); + subst("curandStateMRG32k3a", "hiprandStateMRG32k3a", "type"); + subst("curandStateMRG32k3a_t", "hiprandStateMRG32k3a_t", "type"); + subst("curandStateMtgp32", "hiprandStateMtgp32", "type"); + subst("curandStateMtgp32_t", "hiprandStateMtgp32_t", "type"); + subst("curandStatePhilox4_32_10", "hiprandStatePhilox4_32_10", "type"); + subst("curandStatePhilox4_32_10_t", "hiprandStatePhilox4_32_10_t", "type"); + subst("curandStateSobol32", "hiprandStateSobol32", "type"); + subst("curandStateSobol32_t", "hiprandStateSobol32_t", "type"); + subst("curandStateXORWOW", "hiprandStateXORWOW", "type"); + subst("curandStateXORWOW_t", "hiprandStateXORWOW_t", "type"); + subst("curandState_t", "hiprandState_t", "type"); + subst("curandStatus", "hiprandStatus", "type"); + subst("curandStatus_t", "hiprandStatus_t", "type"); + subst("cusolverDnHandle_t", "hipsolverHandle_t", "type"); + subst("cusolverEigMode_t", "hipsolverEigMode_t", "type"); + subst("cusolverEigRange_t", "hipsolverEigRange_t", "type"); + subst("cusolverEigType_t", "hipsolverEigType_t", "type"); + subst("cusolverRfFactorization_t", "hipsolverRfFactorization_t", "type"); + subst("cusolverRfHandle_t", "hipsolverRfHandle_t", "type"); + subst("cusolverRfMatrixFormat_t", "hipsolverRfMatrixFormat_t", "type"); + subst("cusolverRfNumericBoostReport_t", "hipsolverRfNumericBoostReport_t", "type"); + subst("cusolverRfResetValuesFastMode_t", "hipsolverRfResetValuesFastMode_t", "type"); + subst("cusolverRfTriangularSolve_t", "hipsolverRfTriangularSolve_t", "type"); + subst("cusolverRfUnitDiagonal_t", "hipsolverRfUnitDiagonal_t", "type"); + subst("cusolverSpHandle_t", "hipsolverSpHandle_t", "type"); + subst("cusolverStatus_t", "hipsolverStatus_t", "type"); + subst("cusparseAction_t", "hipsparseAction_t", "type"); + subst("cusparseColorInfo_t", "hipsparseColorInfo_t", "type"); + subst("cusparseConstDnMatDescr_t", "hipsparseConstDnMatDescr_t", "type"); + subst("cusparseConstDnVecDescr_t", "hipsparseConstDnVecDescr_t", "type"); + subst("cusparseConstSpMatDescr_t", "hipsparseConstSpMatDescr_t", "type"); + subst("cusparseConstSpVecDescr_t", "hipsparseConstSpVecDescr_t", "type"); + subst("cusparseCsr2CscAlg_t", "hipsparseCsr2CscAlg_t", "type"); + subst("cusparseDenseToSparseAlg_t", "hipsparseDenseToSparseAlg_t", "type"); + subst("cusparseDiagType_t", "hipsparseDiagType_t", "type"); + subst("cusparseDirection_t", "hipsparseDirection_t", "type"); + subst("cusparseDnMatDescr_t", "hipsparseDnMatDescr_t", "type"); + subst("cusparseDnVecDescr_t", "hipsparseDnVecDescr_t", "type"); + subst("cusparseFillMode_t", "hipsparseFillMode_t", "type"); + subst("cusparseFormat_t", "hipsparseFormat_t", "type"); + subst("cusparseHandle_t", "hipsparseHandle_t", "type"); + subst("cusparseHybMat_t", "hipsparseHybMat_t", "type"); + subst("cusparseHybPartition_t", "hipsparseHybPartition_t", "type"); + subst("cusparseIndexBase_t", "hipsparseIndexBase_t", "type"); + subst("cusparseIndexType_t", "hipsparseIndexType_t", "type"); + subst("cusparseMatDescr_t", "hipsparseMatDescr_t", "type"); + subst("cusparseMatrixType_t", "hipsparseMatrixType_t", "type"); + subst("cusparseOperation_t", "hipsparseOperation_t", "type"); + subst("cusparseOrder_t", "hipsparseOrder_t", "type"); + subst("cusparsePointerMode_t", "hipsparsePointerMode_t", "type"); + subst("cusparseSDDMMAlg_t", "hipsparseSDDMMAlg_t", "type"); + subst("cusparseSolvePolicy_t", "hipsparseSolvePolicy_t", "type"); + subst("cusparseSpGEMMAlg_t", "hipsparseSpGEMMAlg_t", "type"); + subst("cusparseSpGEMMDescr", "hipsparseSpGEMMDescr", "type"); + subst("cusparseSpGEMMDescr_t", "hipsparseSpGEMMDescr_t", "type"); + subst("cusparseSpMMAlg_t", "hipsparseSpMMAlg_t", "type"); + subst("cusparseSpMVAlg_t", "hipsparseSpMVAlg_t", "type"); + subst("cusparseSpMatAttribute_t", "hipsparseSpMatAttribute_t", "type"); + subst("cusparseSpMatDescr_t", "hipsparseSpMatDescr_t", "type"); + subst("cusparseSpSMAlg_t", "hipsparseSpSMAlg_t", "type"); + subst("cusparseSpSMDescr", "hipsparseSpSMDescr", "type"); + subst("cusparseSpSMDescr_t", "hipsparseSpSMDescr_t", "type"); + subst("cusparseSpSVAlg_t", "hipsparseSpSVAlg_t", "type"); + subst("cusparseSpSVDescr", "hipsparseSpSVDescr", "type"); + subst("cusparseSpSVDescr_t", "hipsparseSpSVDescr_t", "type"); + subst("cusparseSpVecDescr_t", "hipsparseSpVecDescr_t", "type"); + subst("cusparseSparseToDenseAlg_t", "hipsparseSparseToDenseAlg_t", "type"); + subst("cusparseStatus_t", "hipsparseStatus_t", "type"); + subst("gesvdjInfo_t", "hipsolverGesvdjInfo_t", "type"); + subst("nvrtcProgram", "hiprtcProgram", "type"); + subst("nvrtcResult", "hiprtcResult", "type"); + subst("pruneInfo", "pruneInfo", "type"); + subst("pruneInfo_t", "pruneInfo_t", "type"); + subst("surfaceReference", "surfaceReference", "type"); + subst("syevjInfo_t", "hipsolverSyevjInfo_t", "type"); + subst("texture", "texture", "type"); + subst("textureReference", "textureReference", "type"); + subst("CUBLASLT_EPILOGUE_BGRADA", "HIPBLASLT_EPILOGUE_BGRADA", "numeric_literal"); + subst("CUBLASLT_EPILOGUE_BGRADB", "HIPBLASLT_EPILOGUE_BGRADB", "numeric_literal"); + subst("CUBLASLT_EPILOGUE_BIAS", "HIPBLASLT_EPILOGUE_BIAS", "numeric_literal"); + subst("CUBLASLT_EPILOGUE_DEFAULT", "HIPBLASLT_EPILOGUE_DEFAULT", "numeric_literal"); + subst("CUBLASLT_EPILOGUE_DGELU", "HIPBLASLT_EPILOGUE_DGELU", "numeric_literal"); + subst("CUBLASLT_EPILOGUE_DGELU_BGRAD", "HIPBLASLT_EPILOGUE_DGELU_BGRAD", "numeric_literal"); + subst("CUBLASLT_EPILOGUE_GELU", "HIPBLASLT_EPILOGUE_GELU", "numeric_literal"); + subst("CUBLASLT_EPILOGUE_GELU_AUX", "HIPBLASLT_EPILOGUE_GELU_AUX", "numeric_literal"); + subst("CUBLASLT_EPILOGUE_GELU_AUX_BIAS", "HIPBLASLT_EPILOGUE_GELU_AUX_BIAS", "numeric_literal"); + subst("CUBLASLT_EPILOGUE_GELU_BIAS", "HIPBLASLT_EPILOGUE_GELU_BIAS", "numeric_literal"); + subst("CUBLASLT_EPILOGUE_RELU", "HIPBLASLT_EPILOGUE_RELU", "numeric_literal"); + subst("CUBLASLT_EPILOGUE_RELU_BIAS", "HIPBLASLT_EPILOGUE_RELU_BIAS", "numeric_literal"); + subst("CUBLASLT_MATMUL_DESC_A_SCALE_POINTER", "HIPBLASLT_MATMUL_DESC_A_SCALE_POINTER", "numeric_literal"); + subst("CUBLASLT_MATMUL_DESC_BIAS_DATA_TYPE", "HIPBLASLT_MATMUL_DESC_BIAS_DATA_TYPE", "numeric_literal"); + subst("CUBLASLT_MATMUL_DESC_BIAS_POINTER", "HIPBLASLT_MATMUL_DESC_BIAS_POINTER", "numeric_literal"); + subst("CUBLASLT_MATMUL_DESC_B_SCALE_POINTER", "HIPBLASLT_MATMUL_DESC_B_SCALE_POINTER", "numeric_literal"); + subst("CUBLASLT_MATMUL_DESC_C_SCALE_POINTER", "HIPBLASLT_MATMUL_DESC_C_SCALE_POINTER", "numeric_literal"); + subst("CUBLASLT_MATMUL_DESC_D_SCALE_POINTER", "HIPBLASLT_MATMUL_DESC_D_SCALE_POINTER", "numeric_literal"); + subst("CUBLASLT_MATMUL_DESC_EPILOGUE", "HIPBLASLT_MATMUL_DESC_EPILOGUE", "numeric_literal"); + subst("CUBLASLT_MATMUL_DESC_EPILOGUE_AUX_BATCH_STRIDE", "HIPBLASLT_MATMUL_DESC_EPILOGUE_AUX_BATCH_STRIDE", "numeric_literal"); + subst("CUBLASLT_MATMUL_DESC_EPILOGUE_AUX_LD", "HIPBLASLT_MATMUL_DESC_EPILOGUE_AUX_LD", "numeric_literal"); + subst("CUBLASLT_MATMUL_DESC_EPILOGUE_AUX_POINTER", "HIPBLASLT_MATMUL_DESC_EPILOGUE_AUX_POINTER", "numeric_literal"); + subst("CUBLASLT_MATMUL_DESC_EPILOGUE_AUX_SCALE_POINTER", "HIPBLASLT_MATMUL_DESC_EPILOGUE_AUX_SCALE_POINTER", "numeric_literal"); + subst("CUBLASLT_MATMUL_DESC_POINTER_MODE", "HIPBLASLT_MATMUL_DESC_POINTER_MODE", "numeric_literal"); + subst("CUBLASLT_MATMUL_DESC_TRANSA", "HIPBLASLT_MATMUL_DESC_TRANSA", "numeric_literal"); + subst("CUBLASLT_MATMUL_DESC_TRANSB", "HIPBLASLT_MATMUL_DESC_TRANSB", "numeric_literal"); + subst("CUBLASLT_MATMUL_PREF_MAX_WORKSPACE_BYTES", "HIPBLASLT_MATMUL_PREF_MAX_WORKSPACE_BYTES", "numeric_literal"); + subst("CUBLASLT_MATMUL_PREF_SEARCH_MODE", "HIPBLASLT_MATMUL_PREF_SEARCH_MODE", "numeric_literal"); + subst("CUBLASLT_MATRIX_LAYOUT_BATCH_COUNT", "HIPBLASLT_MATRIX_LAYOUT_BATCH_COUNT", "numeric_literal"); + subst("CUBLASLT_MATRIX_LAYOUT_COLS", "HIPBLASLT_MATRIX_LAYOUT_COLS", "numeric_literal"); + subst("CUBLASLT_MATRIX_LAYOUT_LD", "HIPBLASLT_MATRIX_LAYOUT_LD", "numeric_literal"); + subst("CUBLASLT_MATRIX_LAYOUT_ORDER", "HIPBLASLT_MATRIX_LAYOUT_ORDER", "numeric_literal"); + subst("CUBLASLT_MATRIX_LAYOUT_ROWS", "HIPBLASLT_MATRIX_LAYOUT_ROWS", "numeric_literal"); + subst("CUBLASLT_MATRIX_LAYOUT_STRIDED_BATCH_OFFSET", "HIPBLASLT_MATRIX_LAYOUT_STRIDED_BATCH_OFFSET", "numeric_literal"); + subst("CUBLASLT_MATRIX_LAYOUT_TYPE", "HIPBLASLT_MATRIX_LAYOUT_TYPE", "numeric_literal"); + subst("CUBLASLT_MATRIX_TRANSFORM_DESC_POINTER_MODE", "HIPBLASLT_MATRIX_TRANSFORM_DESC_POINTER_MODE", "numeric_literal"); + subst("CUBLASLT_MATRIX_TRANSFORM_DESC_SCALE_TYPE", "HIPBLASLT_MATRIX_TRANSFORM_DESC_SCALE_TYPE", "numeric_literal"); + subst("CUBLASLT_MATRIX_TRANSFORM_DESC_TRANSA", "HIPBLASLT_MATRIX_TRANSFORM_DESC_TRANSA", "numeric_literal"); + subst("CUBLASLT_MATRIX_TRANSFORM_DESC_TRANSB", "HIPBLASLT_MATRIX_TRANSFORM_DESC_TRANSB", "numeric_literal"); + subst("CUBLASLT_ORDER_COL", "HIPBLASLT_ORDER_COL", "numeric_literal"); + subst("CUBLASLT_ORDER_ROW", "HIPBLASLT_ORDER_ROW", "numeric_literal"); + subst("CUBLASLT_POINTER_MODE_ALPHA_DEVICE_VECTOR_BETA_HOST", "HIPBLASLT_POINTER_MODE_ALPHA_DEVICE_VECTOR_BETA_HOST", "numeric_literal"); + subst("CUBLASLT_POINTER_MODE_DEVICE", "HIPBLASLT_POINTER_MODE_DEVICE", "numeric_literal"); + subst("CUBLASLT_POINTER_MODE_HOST", "HIPBLASLT_POINTER_MODE_HOST", "numeric_literal"); + subst("CUBLAS_ATOMICS_ALLOWED", "HIPBLAS_ATOMICS_ALLOWED", "numeric_literal"); + subst("CUBLAS_ATOMICS_NOT_ALLOWED", "HIPBLAS_ATOMICS_NOT_ALLOWED", "numeric_literal"); + subst("CUBLAS_COMPUTE_16F", "HIPBLAS_COMPUTE_16F", "numeric_literal"); + subst("CUBLAS_COMPUTE_16F_PEDANTIC", "HIPBLAS_COMPUTE_16F_PEDANTIC", "numeric_literal"); + subst("CUBLAS_COMPUTE_32F", "HIPBLAS_COMPUTE_32F", "numeric_literal"); + subst("CUBLAS_COMPUTE_32F_FAST_16BF", "HIPBLAS_COMPUTE_32F_FAST_16BF", "numeric_literal"); + subst("CUBLAS_COMPUTE_32F_FAST_16F", "HIPBLAS_COMPUTE_32F_FAST_16F", "numeric_literal"); + subst("CUBLAS_COMPUTE_32F_FAST_TF32", "HIPBLAS_COMPUTE_32F_FAST_TF32", "numeric_literal"); + subst("CUBLAS_COMPUTE_32F_PEDANTIC", "HIPBLAS_COMPUTE_32F_PEDANTIC", "numeric_literal"); + subst("CUBLAS_COMPUTE_32I", "HIPBLAS_COMPUTE_32I", "numeric_literal"); + subst("CUBLAS_COMPUTE_32I_PEDANTIC", "HIPBLAS_COMPUTE_32I_PEDANTIC", "numeric_literal"); + subst("CUBLAS_COMPUTE_64F", "HIPBLAS_COMPUTE_64F", "numeric_literal"); + subst("CUBLAS_COMPUTE_64F_PEDANTIC", "HIPBLAS_COMPUTE_64F_PEDANTIC", "numeric_literal"); + subst("CUBLAS_DEFAULT_MATH", "HIPBLAS_DEFAULT_MATH", "numeric_literal"); + subst("CUBLAS_DIAG_NON_UNIT", "HIPBLAS_DIAG_NON_UNIT", "numeric_literal"); + subst("CUBLAS_DIAG_UNIT", "HIPBLAS_DIAG_UNIT", "numeric_literal"); + subst("CUBLAS_FILL_MODE_FULL", "HIPBLAS_FILL_MODE_FULL", "numeric_literal"); + subst("CUBLAS_FILL_MODE_LOWER", "HIPBLAS_FILL_MODE_LOWER", "numeric_literal"); + subst("CUBLAS_FILL_MODE_UPPER", "HIPBLAS_FILL_MODE_UPPER", "numeric_literal"); + subst("CUBLAS_GEMM_DEFAULT", "HIPBLAS_GEMM_DEFAULT", "numeric_literal"); + subst("CUBLAS_GEMM_DFALT", "HIPBLAS_GEMM_DEFAULT", "numeric_literal"); + subst("CUBLAS_MATH_DISALLOW_REDUCED_PRECISION_REDUCTION", "HIPBLAS_MATH_DISALLOW_REDUCED_PRECISION_REDUCTION", "numeric_literal"); + subst("CUBLAS_OP_C", "HIPBLAS_OP_C", "numeric_literal"); + subst("CUBLAS_OP_HERMITAN", "HIPBLAS_OP_C", "numeric_literal"); + subst("CUBLAS_OP_N", "HIPBLAS_OP_N", "numeric_literal"); + subst("CUBLAS_OP_T", "HIPBLAS_OP_T", "numeric_literal"); + subst("CUBLAS_PEDANTIC_MATH", "HIPBLAS_PEDANTIC_MATH", "numeric_literal"); + subst("CUBLAS_POINTER_MODE_DEVICE", "HIPBLAS_POINTER_MODE_DEVICE", "numeric_literal"); + subst("CUBLAS_POINTER_MODE_HOST", "HIPBLAS_POINTER_MODE_HOST", "numeric_literal"); + subst("CUBLAS_SIDE_LEFT", "HIPBLAS_SIDE_LEFT", "numeric_literal"); + subst("CUBLAS_SIDE_RIGHT", "HIPBLAS_SIDE_RIGHT", "numeric_literal"); + subst("CUBLAS_STATUS_ALLOC_FAILED", "HIPBLAS_STATUS_ALLOC_FAILED", "numeric_literal"); + subst("CUBLAS_STATUS_ARCH_MISMATCH", "HIPBLAS_STATUS_ARCH_MISMATCH", "numeric_literal"); + subst("CUBLAS_STATUS_EXECUTION_FAILED", "HIPBLAS_STATUS_EXECUTION_FAILED", "numeric_literal"); + subst("CUBLAS_STATUS_INTERNAL_ERROR", "HIPBLAS_STATUS_INTERNAL_ERROR", "numeric_literal"); + subst("CUBLAS_STATUS_INVALID_VALUE", "HIPBLAS_STATUS_INVALID_VALUE", "numeric_literal"); + subst("CUBLAS_STATUS_LICENSE_ERROR", "HIPBLAS_STATUS_UNKNOWN", "numeric_literal"); + subst("CUBLAS_STATUS_MAPPING_ERROR", "HIPBLAS_STATUS_MAPPING_ERROR", "numeric_literal"); + subst("CUBLAS_STATUS_NOT_INITIALIZED", "HIPBLAS_STATUS_NOT_INITIALIZED", "numeric_literal"); + subst("CUBLAS_STATUS_NOT_SUPPORTED", "HIPBLAS_STATUS_NOT_SUPPORTED", "numeric_literal"); + subst("CUBLAS_STATUS_SUCCESS", "HIPBLAS_STATUS_SUCCESS", "numeric_literal"); + subst("CUBLAS_TENSOR_OP_MATH", "HIPBLAS_TENSOR_OP_MATH", "numeric_literal"); + subst("CUBLAS_TF32_TENSOR_OP_MATH", "HIPBLAS_TF32_TENSOR_OP_MATH", "numeric_literal"); + subst("CUDA_C_16BF", "HIP_C_16BF", "numeric_literal"); + subst("CUDA_C_16F", "HIP_C_16F", "numeric_literal"); + subst("CUDA_C_32F", "HIP_C_32F", "numeric_literal"); + subst("CUDA_C_32I", "HIP_C_32I", "numeric_literal"); + subst("CUDA_C_32U", "HIP_C_32U", "numeric_literal"); + subst("CUDA_C_64F", "HIP_C_64F", "numeric_literal"); + subst("CUDA_C_8I", "HIP_C_8I", "numeric_literal"); + subst("CUDA_C_8U", "HIP_C_8U", "numeric_literal"); + subst("CUDA_ERROR_ALREADY_ACQUIRED", "hipErrorAlreadyAcquired", "numeric_literal"); + subst("CUDA_ERROR_ALREADY_MAPPED", "hipErrorAlreadyMapped", "numeric_literal"); + subst("CUDA_ERROR_ARRAY_IS_MAPPED", "hipErrorArrayIsMapped", "numeric_literal"); + subst("CUDA_ERROR_ASSERT", "hipErrorAssert", "numeric_literal"); + subst("CUDA_ERROR_CAPTURED_EVENT", "hipErrorCapturedEvent", "numeric_literal"); + subst("CUDA_ERROR_CONTEXT_ALREADY_CURRENT", "hipErrorContextAlreadyCurrent", "numeric_literal"); + subst("CUDA_ERROR_CONTEXT_ALREADY_IN_USE", "hipErrorContextAlreadyInUse", "numeric_literal"); + subst("CUDA_ERROR_CONTEXT_IS_DESTROYED", "hipErrorContextIsDestroyed", "numeric_literal"); + subst("CUDA_ERROR_COOPERATIVE_LAUNCH_TOO_LARGE", "hipErrorCooperativeLaunchTooLarge", "numeric_literal"); + subst("CUDA_ERROR_DEINITIALIZED", "hipErrorDeinitialized", "numeric_literal"); + subst("CUDA_ERROR_ECC_UNCORRECTABLE", "hipErrorECCNotCorrectable", "numeric_literal"); + subst("CUDA_ERROR_FILE_NOT_FOUND", "hipErrorFileNotFound", "numeric_literal"); + subst("CUDA_ERROR_GRAPH_EXEC_UPDATE_FAILURE", "hipErrorGraphExecUpdateFailure", "numeric_literal"); + subst("CUDA_ERROR_HOST_MEMORY_ALREADY_REGISTERED", "hipErrorHostMemoryAlreadyRegistered", "numeric_literal"); + subst("CUDA_ERROR_HOST_MEMORY_NOT_REGISTERED", "hipErrorHostMemoryNotRegistered", "numeric_literal"); + subst("CUDA_ERROR_ILLEGAL_ADDRESS", "hipErrorIllegalAddress", "numeric_literal"); + subst("CUDA_ERROR_ILLEGAL_STATE", "hipErrorIllegalState", "numeric_literal"); + subst("CUDA_ERROR_INVALID_CONTEXT", "hipErrorInvalidContext", "numeric_literal"); + subst("CUDA_ERROR_INVALID_DEVICE", "hipErrorInvalidDevice", "numeric_literal"); + subst("CUDA_ERROR_INVALID_GRAPHICS_CONTEXT", "hipErrorInvalidGraphicsContext", "numeric_literal"); + subst("CUDA_ERROR_INVALID_HANDLE", "hipErrorInvalidHandle", "numeric_literal"); + subst("CUDA_ERROR_INVALID_IMAGE", "hipErrorInvalidImage", "numeric_literal"); + subst("CUDA_ERROR_INVALID_PTX", "hipErrorInvalidKernelFile", "numeric_literal"); + subst("CUDA_ERROR_INVALID_SOURCE", "hipErrorInvalidSource", "numeric_literal"); + subst("CUDA_ERROR_INVALID_VALUE", "hipErrorInvalidValue", "numeric_literal"); + subst("CUDA_ERROR_LAUNCH_FAILED", "hipErrorLaunchFailure", "numeric_literal"); + subst("CUDA_ERROR_LAUNCH_OUT_OF_RESOURCES", "hipErrorLaunchOutOfResources", "numeric_literal"); + subst("CUDA_ERROR_LAUNCH_TIMEOUT", "hipErrorLaunchTimeOut", "numeric_literal"); + subst("CUDA_ERROR_MAP_FAILED", "hipErrorMapFailed", "numeric_literal"); + subst("CUDA_ERROR_NOT_FOUND", "hipErrorNotFound", "numeric_literal"); + subst("CUDA_ERROR_NOT_INITIALIZED", "hipErrorNotInitialized", "numeric_literal"); + subst("CUDA_ERROR_NOT_MAPPED", "hipErrorNotMapped", "numeric_literal"); + subst("CUDA_ERROR_NOT_MAPPED_AS_ARRAY", "hipErrorNotMappedAsArray", "numeric_literal"); + subst("CUDA_ERROR_NOT_MAPPED_AS_POINTER", "hipErrorNotMappedAsPointer", "numeric_literal"); + subst("CUDA_ERROR_NOT_READY", "hipErrorNotReady", "numeric_literal"); + subst("CUDA_ERROR_NOT_SUPPORTED", "hipErrorNotSupported", "numeric_literal"); + subst("CUDA_ERROR_NO_BINARY_FOR_GPU", "hipErrorNoBinaryForGpu", "numeric_literal"); + subst("CUDA_ERROR_NO_DEVICE", "hipErrorNoDevice", "numeric_literal"); + subst("CUDA_ERROR_OPERATING_SYSTEM", "hipErrorOperatingSystem", "numeric_literal"); + subst("CUDA_ERROR_OUT_OF_MEMORY", "hipErrorOutOfMemory", "numeric_literal"); + subst("CUDA_ERROR_PEER_ACCESS_ALREADY_ENABLED", "hipErrorPeerAccessAlreadyEnabled", "numeric_literal"); + subst("CUDA_ERROR_PEER_ACCESS_NOT_ENABLED", "hipErrorPeerAccessNotEnabled", "numeric_literal"); + subst("CUDA_ERROR_PEER_ACCESS_UNSUPPORTED", "hipErrorPeerAccessUnsupported", "numeric_literal"); + subst("CUDA_ERROR_PRIMARY_CONTEXT_ACTIVE", "hipErrorSetOnActiveProcess", "numeric_literal"); + subst("CUDA_ERROR_PROFILER_ALREADY_STARTED", "hipErrorProfilerAlreadyStarted", "numeric_literal"); + subst("CUDA_ERROR_PROFILER_ALREADY_STOPPED", "hipErrorProfilerAlreadyStopped", "numeric_literal"); + subst("CUDA_ERROR_PROFILER_DISABLED", "hipErrorProfilerDisabled", "numeric_literal"); + subst("CUDA_ERROR_PROFILER_NOT_INITIALIZED", "hipErrorProfilerNotInitialized", "numeric_literal"); + subst("CUDA_ERROR_SHARED_OBJECT_INIT_FAILED", "hipErrorSharedObjectInitFailed", "numeric_literal"); + subst("CUDA_ERROR_SHARED_OBJECT_SYMBOL_NOT_FOUND", "hipErrorSharedObjectSymbolNotFound", "numeric_literal"); + subst("CUDA_ERROR_STREAM_CAPTURE_IMPLICIT", "hipErrorStreamCaptureImplicit", "numeric_literal"); + subst("CUDA_ERROR_STREAM_CAPTURE_INVALIDATED", "hipErrorStreamCaptureInvalidated", "numeric_literal"); + subst("CUDA_ERROR_STREAM_CAPTURE_ISOLATION", "hipErrorStreamCaptureIsolation", "numeric_literal"); + subst("CUDA_ERROR_STREAM_CAPTURE_MERGE", "hipErrorStreamCaptureMerge", "numeric_literal"); + subst("CUDA_ERROR_STREAM_CAPTURE_UNJOINED", "hipErrorStreamCaptureUnjoined", "numeric_literal"); + subst("CUDA_ERROR_STREAM_CAPTURE_UNMATCHED", "hipErrorStreamCaptureUnmatched", "numeric_literal"); + subst("CUDA_ERROR_STREAM_CAPTURE_UNSUPPORTED", "hipErrorStreamCaptureUnsupported", "numeric_literal"); + subst("CUDA_ERROR_STREAM_CAPTURE_WRONG_THREAD", "hipErrorStreamCaptureWrongThread", "numeric_literal"); + subst("CUDA_ERROR_UNKNOWN", "hipErrorUnknown", "numeric_literal"); + subst("CUDA_ERROR_UNMAP_FAILED", "hipErrorUnmapFailed", "numeric_literal"); + subst("CUDA_ERROR_UNSUPPORTED_LIMIT", "hipErrorUnsupportedLimit", "numeric_literal"); + subst("CUDA_GRAPH_INSTANTIATE_ERROR", "hipGraphInstantiateError", "numeric_literal"); + subst("CUDA_GRAPH_INSTANTIATE_FLAG_AUTO_FREE_ON_LAUNCH", "hipGraphInstantiateFlagAutoFreeOnLaunch", "numeric_literal"); + subst("CUDA_GRAPH_INSTANTIATE_FLAG_DEVICE_LAUNCH", "hipGraphInstantiateFlagDeviceLaunch", "numeric_literal"); + subst("CUDA_GRAPH_INSTANTIATE_FLAG_UPLOAD", "hipGraphInstantiateFlagUpload", "numeric_literal"); + subst("CUDA_GRAPH_INSTANTIATE_FLAG_USE_NODE_PRIORITY", "hipGraphInstantiateFlagUseNodePriority", "numeric_literal"); + subst("CUDA_GRAPH_INSTANTIATE_INVALID_STRUCTURE", "hipGraphInstantiateInvalidStructure", "numeric_literal"); + subst("CUDA_GRAPH_INSTANTIATE_MULTIPLE_CTXS_NOT_SUPPORTED", "hipGraphInstantiateMultipleDevicesNotSupported", "numeric_literal"); + subst("CUDA_GRAPH_INSTANTIATE_NODE_OPERATION_NOT_SUPPORTED", "hipGraphInstantiateNodeOperationNotSupported", "numeric_literal"); + subst("CUDA_GRAPH_INSTANTIATE_SUCCESS", "hipGraphInstantiateSuccess", "numeric_literal"); + subst("CUDA_R_16BF", "HIP_R_16BF", "numeric_literal"); + subst("CUDA_R_16F", "HIP_R_16F", "numeric_literal"); + subst("CUDA_R_32F", "HIP_R_32F", "numeric_literal"); + subst("CUDA_R_32I", "HIP_R_32I", "numeric_literal"); + subst("CUDA_R_32U", "HIP_R_32U", "numeric_literal"); + subst("CUDA_R_64F", "HIP_R_64F", "numeric_literal"); + subst("CUDA_R_8I", "HIP_R_8I", "numeric_literal"); + subst("CUDA_R_8U", "HIP_R_8U", "numeric_literal"); + subst("CUDA_SUCCESS", "hipSuccess", "numeric_literal"); + subst("CUDNN_16BIT_INDICES", "HIPDNN_16BIT_INDICES", "numeric_literal"); + subst("CUDNN_32BIT_INDICES", "HIPDNN_32BIT_INDICES", "numeric_literal"); + subst("CUDNN_64BIT_INDICES", "HIPDNN_64BIT_INDICES", "numeric_literal"); + subst("CUDNN_8BIT_INDICES", "HIPDNN_8BIT_INDICES", "numeric_literal"); + subst("CUDNN_ACTIVATION_CLIPPED_RELU", "HIPDNN_ACTIVATION_CLIPPED_RELU", "numeric_literal"); + subst("CUDNN_ACTIVATION_ELU", "HIPDNN_ACTIVATION_ELU", "numeric_literal"); + subst("CUDNN_ACTIVATION_IDENTITY", "HIPDNN_ACTIVATION_PATHTRU", "numeric_literal"); + subst("CUDNN_ACTIVATION_RELU", "HIPDNN_ACTIVATION_RELU", "numeric_literal"); + subst("CUDNN_ACTIVATION_SIGMOID", "HIPDNN_ACTIVATION_SIGMOID", "numeric_literal"); + subst("CUDNN_ACTIVATION_SWISH", "HIPDNN_ACTIVATION_SWISH", "numeric_literal"); + subst("CUDNN_ACTIVATION_TANH", "HIPDNN_ACTIVATION_TANH", "numeric_literal"); + subst("CUDNN_BATCHNORM_PER_ACTIVATION", "HIPDNN_BATCHNORM_PER_ACTIVATION", "numeric_literal"); + subst("CUDNN_BATCHNORM_SPATIAL", "HIPDNN_BATCHNORM_SPATIAL", "numeric_literal"); + subst("CUDNN_BATCHNORM_SPATIAL_PERSISTENT", "HIPDNN_BATCHNORM_SPATIAL_PERSISTENT", "numeric_literal"); + subst("CUDNN_BIDIRECTIONAL", "HIPDNN_BIDIRECTIONAL", "numeric_literal"); + subst("CUDNN_BN_MIN_EPSILON", "HIPDNN_BN_MIN_EPSILON", "numeric_literal"); + subst("CUDNN_CONVOLUTION", "HIPDNN_CONVOLUTION", "numeric_literal"); + subst("CUDNN_CONVOLUTION_BWD_DATA_ALGO_0", "HIPDNN_CONVOLUTION_BWD_DATA_ALGO_0", "numeric_literal"); + subst("CUDNN_CONVOLUTION_BWD_DATA_ALGO_1", "HIPDNN_CONVOLUTION_BWD_DATA_ALGO_1", "numeric_literal"); + subst("CUDNN_CONVOLUTION_BWD_DATA_ALGO_COUNT", "HIPDNN_CONVOLUTION_BWD_DATA_ALGO_TRANSPOSE_GEMM", "numeric_literal"); + subst("CUDNN_CONVOLUTION_BWD_DATA_ALGO_FFT", "HIPDNN_CONVOLUTION_BWD_DATA_ALGO_FFT", "numeric_literal"); + subst("CUDNN_CONVOLUTION_BWD_DATA_ALGO_FFT_TILING", "HIPDNN_CONVOLUTION_BWD_DATA_ALGO_FFT_TILING", "numeric_literal"); + subst("CUDNN_CONVOLUTION_BWD_DATA_ALGO_WINOGRAD", "HIPDNN_CONVOLUTION_BWD_DATA_ALGO_WINOGRAD", "numeric_literal"); + subst("CUDNN_CONVOLUTION_BWD_DATA_ALGO_WINOGRAD_NONFUSED", "HIPDNN_CONVOLUTION_BWD_DATA_ALGO_WINOGRAD_NONFUSED", "numeric_literal"); + subst("CUDNN_CONVOLUTION_BWD_DATA_NO_WORKSPACE", "HIPDNN_CONVOLUTION_BWD_DATA_NO_WORKSPACE", "numeric_literal"); + subst("CUDNN_CONVOLUTION_BWD_DATA_PREFER_FASTEST", "HIPDNN_CONVOLUTION_BWD_DATA_PREFER_FASTEST", "numeric_literal"); + subst("CUDNN_CONVOLUTION_BWD_DATA_SPECIFY_WORKSPACE_LIMIT", "HIPDNN_CONVOLUTION_BWD_DATA_SPECIFY_WORKSPACE_LIMIT", "numeric_literal"); + subst("CUDNN_CONVOLUTION_BWD_FILTER_ALGO_0", "HIPDNN_CONVOLUTION_BWD_FILTER_ALGO_0", "numeric_literal"); + subst("CUDNN_CONVOLUTION_BWD_FILTER_ALGO_1", "HIPDNN_CONVOLUTION_BWD_FILTER_ALGO_1", "numeric_literal"); + subst("CUDNN_CONVOLUTION_BWD_FILTER_ALGO_3", "HIPDNN_CONVOLUTION_BWD_FILTER_ALGO_3", "numeric_literal"); + subst("CUDNN_CONVOLUTION_BWD_FILTER_ALGO_COUNT", "HIPDNN_CONVOLUTION_BWD_FILTER_ALGO_COUNT", "numeric_literal"); + subst("CUDNN_CONVOLUTION_BWD_FILTER_ALGO_FFT", "HIPDNN_CONVOLUTION_BWD_FILTER_ALGO_FFT", "numeric_literal"); + subst("CUDNN_CONVOLUTION_BWD_FILTER_ALGO_FFT_TILING", "HIPDNN_CONVOLUTION_BWD_FILTER_ALGO_FFT_TILING", "numeric_literal"); + subst("CUDNN_CONVOLUTION_BWD_FILTER_ALGO_WINOGRAD", "HIPDNN_CONVOLUTION_BWD_FILTER_ALGO_WINOGRAD", "numeric_literal"); + subst("CUDNN_CONVOLUTION_BWD_FILTER_ALGO_WINOGRAD_NONFUSED", "HIPDNN_CONVOLUTION_BWD_FILTER_ALGO_WINOGRAD_NONFUSED", "numeric_literal"); + subst("CUDNN_CONVOLUTION_BWD_FILTER_NO_WORKSPACE", "HIPDNN_CONVOLUTION_BWD_FILTER_NO_WORKSPACE", "numeric_literal"); + subst("CUDNN_CONVOLUTION_BWD_FILTER_PREFER_FASTEST", "HIPDNN_CONVOLUTION_BWD_FILTER_PREFER_FASTEST", "numeric_literal"); + subst("CUDNN_CONVOLUTION_BWD_FILTER_SPECIFY_WORKSPACE_LIMIT", "HIPDNN_CONVOLUTION_BWD_FILTER_SPECIFY_WORKSPACE_LIMIT", "numeric_literal"); + subst("CUDNN_CONVOLUTION_FWD_ALGO_COUNT", "HIPDNN_CONVOLUTION_FWD_ALGO_COUNT", "numeric_literal"); + subst("CUDNN_CONVOLUTION_FWD_ALGO_DIRECT", "HIPDNN_CONVOLUTION_FWD_ALGO_DIRECT", "numeric_literal"); + subst("CUDNN_CONVOLUTION_FWD_ALGO_FFT", "HIPDNN_CONVOLUTION_FWD_ALGO_FFT", "numeric_literal"); + subst("CUDNN_CONVOLUTION_FWD_ALGO_FFT_TILING", "HIPDNN_CONVOLUTION_FWD_ALGO_FFT_TILING", "numeric_literal"); + subst("CUDNN_CONVOLUTION_FWD_ALGO_GEMM", "HIPDNN_CONVOLUTION_FWD_ALGO_GEMM", "numeric_literal"); + subst("CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_GEMM", "HIPDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_GEMM", "numeric_literal"); + subst("CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_PRECOMP_GEMM", "HIPDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_PRECOMP_GEMM", "numeric_literal"); + subst("CUDNN_CONVOLUTION_FWD_ALGO_WINOGRAD", "HIPDNN_CONVOLUTION_FWD_ALGO_WINOGRAD", "numeric_literal"); + subst("CUDNN_CONVOLUTION_FWD_ALGO_WINOGRAD_NONFUSED", "HIPDNN_CONVOLUTION_FWD_ALGO_WINOGRAD_NONFUSED", "numeric_literal"); + subst("CUDNN_CONVOLUTION_FWD_NO_WORKSPACE", "HIPDNN_CONVOLUTION_FWD_NO_WORKSPACE", "numeric_literal"); + subst("CUDNN_CONVOLUTION_FWD_PREFER_FASTEST", "HIPDNN_CONVOLUTION_FWD_PREFER_FASTEST", "numeric_literal"); + subst("CUDNN_CONVOLUTION_FWD_SPECIFY_WORKSPACE_LIMIT", "HIPDNN_CONVOLUTION_FWD_SPECIFY_WORKSPACE_LIMIT", "numeric_literal"); + subst("CUDNN_CROSS_CORRELATION", "HIPDNN_CROSS_CORRELATION", "numeric_literal"); + subst("CUDNN_DATA_DOUBLE", "HIPDNN_DATA_DOUBLE", "numeric_literal"); + subst("CUDNN_DATA_FLOAT", "HIPDNN_DATA_FLOAT", "numeric_literal"); + subst("CUDNN_DATA_HALF", "HIPDNN_DATA_HALF", "numeric_literal"); + subst("CUDNN_DATA_INT32", "HIPDNN_DATA_INT32", "numeric_literal"); + subst("CUDNN_DATA_INT8", "HIPDNN_DATA_INT8", "numeric_literal"); + subst("CUDNN_DATA_INT8x4", "HIPDNN_DATA_INT8x4", "numeric_literal"); + subst("CUDNN_DEFAULT_MATH", "HIPDNN_DEFAULT_MATH", "numeric_literal"); + subst("CUDNN_GRU", "HIPDNN_GRU", "numeric_literal"); + subst("CUDNN_LINEAR_INPUT", "HIPDNN_LINEAR_INPUT", "numeric_literal"); + subst("CUDNN_LRN_CROSS_CHANNEL_DIM1", "HIPDNN_LRN_CROSS_CHANNEL", "numeric_literal"); + subst("CUDNN_LSTM", "HIPDNN_LSTM", "numeric_literal"); + subst("CUDNN_NOT_PROPAGATE_NAN", "HIPDNN_NOT_PROPAGATE_NAN", "numeric_literal"); + subst("CUDNN_OP_TENSOR_ADD", "HIPDNN_OP_TENSOR_ADD", "numeric_literal"); + subst("CUDNN_OP_TENSOR_MAX", "HIPDNN_OP_TENSOR_MAX", "numeric_literal"); + subst("CUDNN_OP_TENSOR_MIN", "HIPDNN_OP_TENSOR_MIN", "numeric_literal"); + subst("CUDNN_OP_TENSOR_MUL", "HIPDNN_OP_TENSOR_MUL", "numeric_literal"); + subst("CUDNN_OP_TENSOR_SQRT", "HIPDNN_OP_TENSOR_SQRT", "numeric_literal"); + subst("CUDNN_POOLING_AVERAGE_COUNT_EXCLUDE_PADDING", "HIPDNN_POOLING_AVERAGE_COUNT_EXCLUDE_PADDING", "numeric_literal"); + subst("CUDNN_POOLING_AVERAGE_COUNT_INCLUDE_PADDING", "HIPDNN_POOLING_AVERAGE_COUNT_INCLUDE_PADDING", "numeric_literal"); + subst("CUDNN_POOLING_MAX", "HIPDNN_POOLING_MAX", "numeric_literal"); + subst("CUDNN_POOLING_MAX_DETERMINISTIC", "HIPDNN_POOLING_MAX_DETERMINISTIC", "numeric_literal"); + subst("CUDNN_PROPAGATE_NAN", "HIPDNN_PROPAGATE_NAN", "numeric_literal"); + subst("CUDNN_REDUCE_TENSOR_ADD", "HIPDNN_REDUCE_TENSOR_ADD", "numeric_literal"); + subst("CUDNN_REDUCE_TENSOR_AMAX", "HIPDNN_REDUCE_TENSOR_AMAX", "numeric_literal"); + subst("CUDNN_REDUCE_TENSOR_AVG", "HIPDNN_REDUCE_TENSOR_AVG", "numeric_literal"); + subst("CUDNN_REDUCE_TENSOR_FLATTENED_INDICES", "HIPDNN_REDUCE_TENSOR_FLATTENED_INDICES", "numeric_literal"); + subst("CUDNN_REDUCE_TENSOR_MAX", "HIPDNN_REDUCE_TENSOR_MAX", "numeric_literal"); + subst("CUDNN_REDUCE_TENSOR_MIN", "HIPDNN_REDUCE_TENSOR_MIN", "numeric_literal"); + subst("CUDNN_REDUCE_TENSOR_MUL", "HIPDNN_REDUCE_TENSOR_MUL", "numeric_literal"); + subst("CUDNN_REDUCE_TENSOR_MUL_NO_ZEROS", "HIPDNN_REDUCE_TENSOR_MUL_NO_ZEROS", "numeric_literal"); + subst("CUDNN_REDUCE_TENSOR_NORM1", "HIPDNN_REDUCE_TENSOR_NORM1", "numeric_literal"); + subst("CUDNN_REDUCE_TENSOR_NORM2", "HIPDNN_REDUCE_TENSOR_NORM2", "numeric_literal"); + subst("CUDNN_REDUCE_TENSOR_NO_INDICES", "HIPDNN_REDUCE_TENSOR_NO_INDICES", "numeric_literal"); + subst("CUDNN_RNN_ALGO_PERSIST_DYNAMIC", "HIPDNN_RNN_ALGO_PERSIST_DYNAMIC", "numeric_literal"); + subst("CUDNN_RNN_ALGO_PERSIST_STATIC", "HIPDNN_RNN_ALGO_PERSIST_STATIC", "numeric_literal"); + subst("CUDNN_RNN_ALGO_PERSIST_STATIC_SMALL_H", "HIPDNN_RNN_ALGO_PERSIST_STATIC_SMALL_H", "numeric_literal"); + subst("CUDNN_RNN_ALGO_STANDARD", "HIPDNN_RNN_ALGO_STANDARD", "numeric_literal"); + subst("CUDNN_RNN_DOUBLE_BIAS", "HIPDNN_RNN_WITH_BIAS", "numeric_literal"); + subst("CUDNN_RNN_NO_BIAS", "HIPDNN_RNN_NO_BIAS", "numeric_literal"); + subst("CUDNN_RNN_RELU", "HIPDNN_RNN_RELU", "numeric_literal"); + subst("CUDNN_RNN_SINGLE_INP_BIAS", "HIPDNN_RNN_WITH_BIAS", "numeric_literal"); + subst("CUDNN_RNN_SINGLE_REC_BIAS", "HIPDNN_RNN_WITH_BIAS", "numeric_literal"); + subst("CUDNN_RNN_TANH", "HIPDNN_RNN_TANH", "numeric_literal"); + subst("CUDNN_SKIP_INPUT", "HIPDNN_SKIP_INPUT", "numeric_literal"); + subst("CUDNN_SOFTMAX_ACCURATE", "HIPDNN_SOFTMAX_ACCURATE", "numeric_literal"); + subst("CUDNN_SOFTMAX_FAST", "HIPDNN_SOFTMAX_FAST", "numeric_literal"); + subst("CUDNN_SOFTMAX_LOG", "HIPDNN_SOFTMAX_LOG", "numeric_literal"); + subst("CUDNN_SOFTMAX_MODE_CHANNEL", "HIPDNN_SOFTMAX_MODE_CHANNEL", "numeric_literal"); + subst("CUDNN_SOFTMAX_MODE_INSTANCE", "HIPDNN_SOFTMAX_MODE_INSTANCE", "numeric_literal"); + subst("CUDNN_STATUS_ALLOC_FAILED", "HIPDNN_STATUS_ALLOC_FAILED", "numeric_literal"); + subst("CUDNN_STATUS_ARCH_MISMATCH", "HIPDNN_STATUS_ARCH_MISMATCH", "numeric_literal"); + subst("CUDNN_STATUS_BAD_PARAM", "HIPDNN_STATUS_BAD_PARAM", "numeric_literal"); + subst("CUDNN_STATUS_EXECUTION_FAILED", "HIPDNN_STATUS_EXECUTION_FAILED", "numeric_literal"); + subst("CUDNN_STATUS_INTERNAL_ERROR", "HIPDNN_STATUS_INTERNAL_ERROR", "numeric_literal"); + subst("CUDNN_STATUS_INVALID_VALUE", "HIPDNN_STATUS_INVALID_VALUE", "numeric_literal"); + subst("CUDNN_STATUS_LICENSE_ERROR", "HIPDNN_STATUS_LICENSE_ERROR", "numeric_literal"); + subst("CUDNN_STATUS_MAPPING_ERROR", "HIPDNN_STATUS_MAPPING_ERROR", "numeric_literal"); + subst("CUDNN_STATUS_NOT_INITIALIZED", "HIPDNN_STATUS_NOT_INITIALIZED", "numeric_literal"); + subst("CUDNN_STATUS_NOT_SUPPORTED", "HIPDNN_STATUS_NOT_SUPPORTED", "numeric_literal"); + subst("CUDNN_STATUS_RUNTIME_PREREQUISITE_MISSING", "HIPDNN_STATUS_RUNTIME_PREREQUISITE_MISSING", "numeric_literal"); + subst("CUDNN_STATUS_SUCCESS", "HIPDNN_STATUS_SUCCESS", "numeric_literal"); + subst("CUDNN_TENSOR_NCHW", "HIPDNN_TENSOR_NCHW", "numeric_literal"); + subst("CUDNN_TENSOR_NCHW_VECT_C", "HIPDNN_TENSOR_NCHW_VECT_C", "numeric_literal"); + subst("CUDNN_TENSOR_NHWC", "HIPDNN_TENSOR_NHWC", "numeric_literal"); + subst("CUDNN_TENSOR_OP_MATH", "HIPDNN_TENSOR_OP_MATH", "numeric_literal"); + subst("CUDNN_UNIDIRECTIONAL", "HIPDNN_UNIDIRECTIONAL", "numeric_literal"); + subst("CUFFT_ALLOC_FAILED", "HIPFFT_ALLOC_FAILED", "numeric_literal"); + subst("CUFFT_C2C", "HIPFFT_C2C", "numeric_literal"); + subst("CUFFT_C2R", "HIPFFT_C2R", "numeric_literal"); + subst("CUFFT_CB_LD_COMPLEX", "HIPFFT_CB_LD_COMPLEX", "numeric_literal"); + subst("CUFFT_CB_LD_COMPLEX_DOUBLE", "HIPFFT_CB_LD_COMPLEX_DOUBLE", "numeric_literal"); + subst("CUFFT_CB_LD_REAL", "HIPFFT_CB_LD_REAL", "numeric_literal"); + subst("CUFFT_CB_LD_REAL_DOUBLE", "HIPFFT_CB_LD_REAL_DOUBLE", "numeric_literal"); + subst("CUFFT_CB_ST_COMPLEX", "HIPFFT_CB_ST_COMPLEX", "numeric_literal"); + subst("CUFFT_CB_ST_COMPLEX_DOUBLE", "HIPFFT_CB_ST_COMPLEX_DOUBLE", "numeric_literal"); + subst("CUFFT_CB_ST_REAL", "HIPFFT_CB_ST_REAL", "numeric_literal"); + subst("CUFFT_CB_ST_REAL_DOUBLE", "HIPFFT_CB_ST_REAL_DOUBLE", "numeric_literal"); + subst("CUFFT_CB_UNDEFINED", "HIPFFT_CB_UNDEFINED", "numeric_literal"); + subst("CUFFT_D2Z", "HIPFFT_D2Z", "numeric_literal"); + subst("CUFFT_EXEC_FAILED", "HIPFFT_EXEC_FAILED", "numeric_literal"); + subst("CUFFT_FORWARD", "HIPFFT_FORWARD", "numeric_literal"); + subst("CUFFT_INCOMPLETE_PARAMETER_LIST", "HIPFFT_INCOMPLETE_PARAMETER_LIST", "numeric_literal"); + subst("CUFFT_INTERNAL_ERROR", "HIPFFT_INTERNAL_ERROR", "numeric_literal"); + subst("CUFFT_INVALID_DEVICE", "HIPFFT_INVALID_DEVICE", "numeric_literal"); + subst("CUFFT_INVALID_PLAN", "HIPFFT_INVALID_PLAN", "numeric_literal"); + subst("CUFFT_INVALID_SIZE", "HIPFFT_INVALID_SIZE", "numeric_literal"); + subst("CUFFT_INVALID_TYPE", "HIPFFT_INVALID_TYPE", "numeric_literal"); + subst("CUFFT_INVALID_VALUE", "HIPFFT_INVALID_VALUE", "numeric_literal"); + subst("CUFFT_INVERSE", "HIPFFT_BACKWARD", "numeric_literal"); + subst("CUFFT_NOT_IMPLEMENTED", "HIPFFT_NOT_IMPLEMENTED", "numeric_literal"); + subst("CUFFT_NOT_SUPPORTED", "HIPFFT_NOT_SUPPORTED", "numeric_literal"); + subst("CUFFT_NO_WORKSPACE", "HIPFFT_NO_WORKSPACE", "numeric_literal"); + subst("CUFFT_PARSE_ERROR", "HIPFFT_PARSE_ERROR", "numeric_literal"); + subst("CUFFT_R2C", "HIPFFT_R2C", "numeric_literal"); + subst("CUFFT_SETUP_FAILED", "HIPFFT_SETUP_FAILED", "numeric_literal"); + subst("CUFFT_SUCCESS", "HIPFFT_SUCCESS", "numeric_literal"); + subst("CUFFT_UNALIGNED_DATA", "HIPFFT_UNALIGNED_DATA", "numeric_literal"); + subst("CUFFT_Z2D", "HIPFFT_Z2D", "numeric_literal"); + subst("CUFFT_Z2Z", "HIPFFT_Z2Z", "numeric_literal"); + subst("CURAND_DIRECTION_VECTORS_32_JOEKUO6", "HIPRAND_DIRECTION_VECTORS_32_JOEKUO6", "numeric_literal"); + subst("CURAND_DIRECTION_VECTORS_64_JOEKUO6", "HIPRAND_DIRECTION_VECTORS_64_JOEKUO6", "numeric_literal"); + subst("CURAND_RNG_PSEUDO_DEFAULT", "HIPRAND_RNG_PSEUDO_DEFAULT", "numeric_literal"); + subst("CURAND_RNG_PSEUDO_MRG32K3A", "HIPRAND_RNG_PSEUDO_MRG32K3A", "numeric_literal"); + subst("CURAND_RNG_PSEUDO_MT19937", "HIPRAND_RNG_PSEUDO_MT19937", "numeric_literal"); + subst("CURAND_RNG_PSEUDO_MTGP32", "HIPRAND_RNG_PSEUDO_MTGP32", "numeric_literal"); + subst("CURAND_RNG_PSEUDO_PHILOX4_32_10", "HIPRAND_RNG_PSEUDO_PHILOX4_32_10", "numeric_literal"); + subst("CURAND_RNG_PSEUDO_XORWOW", "HIPRAND_RNG_PSEUDO_XORWOW", "numeric_literal"); + subst("CURAND_RNG_QUASI_DEFAULT", "HIPRAND_RNG_QUASI_DEFAULT", "numeric_literal"); + subst("CURAND_RNG_QUASI_SCRAMBLED_SOBOL32", "HIPRAND_RNG_QUASI_SCRAMBLED_SOBOL32", "numeric_literal"); + subst("CURAND_RNG_QUASI_SCRAMBLED_SOBOL64", "HIPRAND_RNG_QUASI_SCRAMBLED_SOBOL64", "numeric_literal"); + subst("CURAND_RNG_QUASI_SOBOL32", "HIPRAND_RNG_QUASI_SOBOL32", "numeric_literal"); + subst("CURAND_RNG_QUASI_SOBOL64", "HIPRAND_RNG_QUASI_SOBOL64", "numeric_literal"); + subst("CURAND_RNG_TEST", "HIPRAND_RNG_TEST", "numeric_literal"); + subst("CURAND_SCRAMBLED_DIRECTION_VECTORS_32_JOEKUO6", "HIPRAND_SCRAMBLED_DIRECTION_VECTORS_32_JOEKUO6", "numeric_literal"); + subst("CURAND_SCRAMBLED_DIRECTION_VECTORS_64_JOEKUO6", "HIPRAND_SCRAMBLED_DIRECTION_VECTORS_64_JOEKUO6", "numeric_literal"); + subst("CURAND_STATUS_ALLOCATION_FAILED", "HIPRAND_STATUS_ALLOCATION_FAILED", "numeric_literal"); + subst("CURAND_STATUS_ARCH_MISMATCH", "HIPRAND_STATUS_ARCH_MISMATCH", "numeric_literal"); + subst("CURAND_STATUS_DOUBLE_PRECISION_REQUIRED", "HIPRAND_STATUS_DOUBLE_PRECISION_REQUIRED", "numeric_literal"); + subst("CURAND_STATUS_INITIALIZATION_FAILED", "HIPRAND_STATUS_INITIALIZATION_FAILED", "numeric_literal"); + subst("CURAND_STATUS_INTERNAL_ERROR", "HIPRAND_STATUS_INTERNAL_ERROR", "numeric_literal"); + subst("CURAND_STATUS_LAUNCH_FAILURE", "HIPRAND_STATUS_LAUNCH_FAILURE", "numeric_literal"); + subst("CURAND_STATUS_LENGTH_NOT_MULTIPLE", "HIPRAND_STATUS_LENGTH_NOT_MULTIPLE", "numeric_literal"); + subst("CURAND_STATUS_NOT_INITIALIZED", "HIPRAND_STATUS_NOT_INITIALIZED", "numeric_literal"); + subst("CURAND_STATUS_OUT_OF_RANGE", "HIPRAND_STATUS_OUT_OF_RANGE", "numeric_literal"); + subst("CURAND_STATUS_PREEXISTING_FAILURE", "HIPRAND_STATUS_PREEXISTING_FAILURE", "numeric_literal"); + subst("CURAND_STATUS_SUCCESS", "HIPRAND_STATUS_SUCCESS", "numeric_literal"); + subst("CURAND_STATUS_TYPE_ERROR", "HIPRAND_STATUS_TYPE_ERROR", "numeric_literal"); + subst("CURAND_STATUS_VERSION_MISMATCH", "HIPRAND_STATUS_VERSION_MISMATCH", "numeric_literal"); + subst("CUSOLVERRF_FACTORIZATION_ALG0", "HIPSOLVERRF_FACTORIZATION_ALG0", "numeric_literal"); + subst("CUSOLVERRF_FACTORIZATION_ALG1", "HIPSOLVERRF_FACTORIZATION_ALG1", "numeric_literal"); + subst("CUSOLVERRF_FACTORIZATION_ALG2", "HIPSOLVERRF_FACTORIZATION_ALG2", "numeric_literal"); + subst("CUSOLVERRF_MATRIX_FORMAT_CSC", "HIPSOLVERRF_MATRIX_FORMAT_CSC", "numeric_literal"); + subst("CUSOLVERRF_MATRIX_FORMAT_CSR", "HIPSOLVERRF_MATRIX_FORMAT_CSR", "numeric_literal"); + subst("CUSOLVERRF_NUMERIC_BOOST_NOT_USED", "HIPSOLVERRF_NUMERIC_BOOST_NOT_USED", "numeric_literal"); + subst("CUSOLVERRF_NUMERIC_BOOST_USED", "HIPSOLVERRF_NUMERIC_BOOST_USED", "numeric_literal"); + subst("CUSOLVERRF_RESET_VALUES_FAST_MODE_OFF", "HIPSOLVERRF_RESET_VALUES_FAST_MODE_OFF", "numeric_literal"); + subst("CUSOLVERRF_RESET_VALUES_FAST_MODE_ON", "HIPSOLVERRF_RESET_VALUES_FAST_MODE_ON", "numeric_literal"); + subst("CUSOLVERRF_TRIANGULAR_SOLVE_ALG1", "HIPSOLVERRF_TRIANGULAR_SOLVE_ALG1", "numeric_literal"); + subst("CUSOLVERRF_TRIANGULAR_SOLVE_ALG2", "HIPSOLVERRF_TRIANGULAR_SOLVE_ALG2", "numeric_literal"); + subst("CUSOLVERRF_TRIANGULAR_SOLVE_ALG3", "HIPSOLVERRF_TRIANGULAR_SOLVE_ALG3", "numeric_literal"); + subst("CUSOLVERRF_UNIT_DIAGONAL_ASSUMED_L", "HIPSOLVERRF_UNIT_DIAGONAL_ASSUMED_L", "numeric_literal"); + subst("CUSOLVERRF_UNIT_DIAGONAL_ASSUMED_U", "HIPSOLVERRF_UNIT_DIAGONAL_ASSUMED_U", "numeric_literal"); + subst("CUSOLVERRF_UNIT_DIAGONAL_STORED_L", "HIPSOLVERRF_UNIT_DIAGONAL_STORED_L", "numeric_literal"); + subst("CUSOLVERRF_UNIT_DIAGONAL_STORED_U", "HIPSOLVERRF_UNIT_DIAGONAL_STORED_U", "numeric_literal"); + subst("CUSOLVER_EIG_MODE_NOVECTOR", "HIPSOLVER_EIG_MODE_NOVECTOR", "numeric_literal"); + subst("CUSOLVER_EIG_MODE_VECTOR", "HIPSOLVER_EIG_MODE_VECTOR", "numeric_literal"); + subst("CUSOLVER_EIG_RANGE_ALL", "HIPSOLVER_EIG_RANGE_ALL", "numeric_literal"); + subst("CUSOLVER_EIG_RANGE_I", "HIPSOLVER_EIG_RANGE_I", "numeric_literal"); + subst("CUSOLVER_EIG_RANGE_V", "HIPSOLVER_EIG_RANGE_V", "numeric_literal"); + subst("CUSOLVER_EIG_TYPE_1", "HIPSOLVER_EIG_TYPE_1", "numeric_literal"); + subst("CUSOLVER_EIG_TYPE_2", "HIPSOLVER_EIG_TYPE_2", "numeric_literal"); + subst("CUSOLVER_EIG_TYPE_3", "HIPSOLVER_EIG_TYPE_3", "numeric_literal"); + subst("CUSOLVER_STATUS_ALLOC_FAILED", "HIPSOLVER_STATUS_ALLOC_FAILED", "numeric_literal"); + subst("CUSOLVER_STATUS_ARCH_MISMATCH", "HIPSOLVER_STATUS_ARCH_MISMATCH", "numeric_literal"); + subst("CUSOLVER_STATUS_EXECUTION_FAILED", "HIPSOLVER_STATUS_EXECUTION_FAILED", "numeric_literal"); + subst("CUSOLVER_STATUS_INTERNAL_ERROR", "HIPSOLVER_STATUS_INTERNAL_ERROR", "numeric_literal"); + subst("CUSOLVER_STATUS_INVALID_VALUE", "HIPSOLVER_STATUS_INVALID_VALUE", "numeric_literal"); + subst("CUSOLVER_STATUS_IRS_INTERNAL_ERROR", "HIPSOLVER_STATUS_INTERNAL_ERROR", "numeric_literal"); + subst("CUSOLVER_STATUS_IRS_NOT_SUPPORTED", "HIPSOLVER_STATUS_NOT_SUPPORTED", "numeric_literal"); + subst("CUSOLVER_STATUS_IRS_PARAMS_INVALID", "HIPSOLVER_STATUS_INVALID_VALUE", "numeric_literal"); + subst("CUSOLVER_STATUS_MAPPING_ERROR", "HIPSOLVER_STATUS_MAPPING_ERROR", "numeric_literal"); + subst("CUSOLVER_STATUS_MATRIX_TYPE_NOT_SUPPORTED", "HIPSOLVER_STATUS_MATRIX_TYPE_NOT_SUPPORTED", "numeric_literal"); + subst("CUSOLVER_STATUS_NOT_INITIALIZED", "HIPSOLVER_STATUS_NOT_INITIALIZED", "numeric_literal"); + subst("CUSOLVER_STATUS_NOT_SUPPORTED", "HIPSOLVER_STATUS_NOT_SUPPORTED", "numeric_literal"); + subst("CUSOLVER_STATUS_SUCCESS", "HIPSOLVER_STATUS_SUCCESS", "numeric_literal"); + subst("CUSOLVER_STATUS_ZERO_PIVOT", "HIPSOLVER_STATUS_ZERO_PIVOT", "numeric_literal"); + subst("CUSPARSE_ACTION_NUMERIC", "HIPSPARSE_ACTION_NUMERIC", "numeric_literal"); + subst("CUSPARSE_ACTION_SYMBOLIC", "HIPSPARSE_ACTION_SYMBOLIC", "numeric_literal"); + subst("CUSPARSE_COOMM_ALG1", "HIPSPARSE_COOMM_ALG1", "numeric_literal"); + subst("CUSPARSE_COOMM_ALG2", "HIPSPARSE_COOMM_ALG2", "numeric_literal"); + subst("CUSPARSE_COOMM_ALG3", "HIPSPARSE_COOMM_ALG3", "numeric_literal"); + subst("CUSPARSE_COOMV_ALG", "HIPSPARSE_COOMV_ALG", "numeric_literal"); + subst("CUSPARSE_CSR2CSC_ALG1", "HIPSPARSE_CSR2CSC_ALG1", "numeric_literal"); + subst("CUSPARSE_CSR2CSC_ALG2", "HIPSPARSE_CSR2CSC_ALG2", "numeric_literal"); + subst("CUSPARSE_CSR2CSC_ALG_DEFAULT", "HIPSPARSE_CSR2CSC_ALG_DEFAULT", "numeric_literal"); + subst("CUSPARSE_CSRMM_ALG1", "HIPSPARSE_CSRMM_ALG1", "numeric_literal"); + subst("CUSPARSE_CSRMV_ALG1", "HIPSPARSE_CSRMV_ALG1", "numeric_literal"); + subst("CUSPARSE_CSRMV_ALG2", "HIPSPARSE_CSRMV_ALG2", "numeric_literal"); + subst("CUSPARSE_DENSETOSPARSE_ALG_DEFAULT", "HIPSPARSE_DENSETOSPARSE_ALG_DEFAULT", "numeric_literal"); + subst("CUSPARSE_DIAG_TYPE_NON_UNIT", "HIPSPARSE_DIAG_TYPE_NON_UNIT", "numeric_literal"); + subst("CUSPARSE_DIAG_TYPE_UNIT", "HIPSPARSE_DIAG_TYPE_UNIT", "numeric_literal"); + subst("CUSPARSE_DIRECTION_COLUMN", "HIPSPARSE_DIRECTION_COLUMN", "numeric_literal"); + subst("CUSPARSE_DIRECTION_ROW", "HIPSPARSE_DIRECTION_ROW", "numeric_literal"); + subst("CUSPARSE_FILL_MODE_LOWER", "HIPSPARSE_FILL_MODE_LOWER", "numeric_literal"); + subst("CUSPARSE_FILL_MODE_UPPER", "HIPSPARSE_FILL_MODE_UPPER", "numeric_literal"); + subst("CUSPARSE_FORMAT_BLOCKED_ELL", "HIPSPARSE_FORMAT_BLOCKED_ELL", "numeric_literal"); + subst("CUSPARSE_FORMAT_COO", "HIPSPARSE_FORMAT_COO", "numeric_literal"); + subst("CUSPARSE_FORMAT_COO_AOS", "HIPSPARSE_FORMAT_COO_AOS", "numeric_literal"); + subst("CUSPARSE_FORMAT_CSC", "HIPSPARSE_FORMAT_CSC", "numeric_literal"); + subst("CUSPARSE_FORMAT_CSR", "HIPSPARSE_FORMAT_CSR", "numeric_literal"); + subst("CUSPARSE_HYB_PARTITION_AUTO", "HIPSPARSE_HYB_PARTITION_AUTO", "numeric_literal"); + subst("CUSPARSE_HYB_PARTITION_MAX", "HIPSPARSE_HYB_PARTITION_MAX", "numeric_literal"); + subst("CUSPARSE_HYB_PARTITION_USER", "HIPSPARSE_HYB_PARTITION_USER", "numeric_literal"); + subst("CUSPARSE_INDEX_16U", "HIPSPARSE_INDEX_16U", "numeric_literal"); + subst("CUSPARSE_INDEX_32I", "HIPSPARSE_INDEX_32I", "numeric_literal"); + subst("CUSPARSE_INDEX_64I", "HIPSPARSE_INDEX_64I", "numeric_literal"); + subst("CUSPARSE_INDEX_BASE_ONE", "HIPSPARSE_INDEX_BASE_ONE", "numeric_literal"); + subst("CUSPARSE_INDEX_BASE_ZERO", "HIPSPARSE_INDEX_BASE_ZERO", "numeric_literal"); + subst("CUSPARSE_MATRIX_TYPE_GENERAL", "HIPSPARSE_MATRIX_TYPE_GENERAL", "numeric_literal"); + subst("CUSPARSE_MATRIX_TYPE_HERMITIAN", "HIPSPARSE_MATRIX_TYPE_HERMITIAN", "numeric_literal"); + subst("CUSPARSE_MATRIX_TYPE_SYMMETRIC", "HIPSPARSE_MATRIX_TYPE_SYMMETRIC", "numeric_literal"); + subst("CUSPARSE_MATRIX_TYPE_TRIANGULAR", "HIPSPARSE_MATRIX_TYPE_TRIANGULAR", "numeric_literal"); + subst("CUSPARSE_MM_ALG_DEFAULT", "HIPSPARSE_MM_ALG_DEFAULT", "numeric_literal"); + subst("CUSPARSE_MV_ALG_DEFAULT", "HIPSPARSE_MV_ALG_DEFAULT", "numeric_literal"); + subst("CUSPARSE_OPERATION_CONJUGATE_TRANSPOSE", "HIPSPARSE_OPERATION_CONJUGATE_TRANSPOSE", "numeric_literal"); + subst("CUSPARSE_OPERATION_NON_TRANSPOSE", "HIPSPARSE_OPERATION_NON_TRANSPOSE", "numeric_literal"); + subst("CUSPARSE_OPERATION_TRANSPOSE", "HIPSPARSE_OPERATION_TRANSPOSE", "numeric_literal"); + subst("CUSPARSE_ORDER_COL", "HIPSPARSE_ORDER_COL", "numeric_literal"); + subst("CUSPARSE_ORDER_ROW", "HIPSPARSE_ORDER_ROW", "numeric_literal"); + subst("CUSPARSE_POINTER_MODE_DEVICE", "HIPSPARSE_POINTER_MODE_DEVICE", "numeric_literal"); + subst("CUSPARSE_POINTER_MODE_HOST", "HIPSPARSE_POINTER_MODE_HOST", "numeric_literal"); + subst("CUSPARSE_SDDMM_ALG_DEFAULT", "HIPSPARSE_SDDMM_ALG_DEFAULT", "numeric_literal"); + subst("CUSPARSE_SOLVE_POLICY_NO_LEVEL", "HIPSPARSE_SOLVE_POLICY_NO_LEVEL", "numeric_literal"); + subst("CUSPARSE_SOLVE_POLICY_USE_LEVEL", "HIPSPARSE_SOLVE_POLICY_USE_LEVEL", "numeric_literal"); + subst("CUSPARSE_SPARSETODENSE_ALG_DEFAULT", "HIPSPARSE_SPARSETODENSE_ALG_DEFAULT", "numeric_literal"); + subst("CUSPARSE_SPGEMM_ALG1", "HIPSPARSE_SPGEMM_ALG1", "numeric_literal"); + subst("CUSPARSE_SPGEMM_ALG2", "HIPSPARSE_SPGEMM_ALG2", "numeric_literal"); + subst("CUSPARSE_SPGEMM_ALG3", "HIPSPARSE_SPGEMM_ALG3", "numeric_literal"); + subst("CUSPARSE_SPGEMM_CSR_ALG_DETERMINITIC", "HIPSPARSE_SPGEMM_CSR_ALG_DETERMINISTIC", "numeric_literal"); + subst("CUSPARSE_SPGEMM_CSR_ALG_NONDETERMINITIC", "HIPSPARSE_SPGEMM_CSR_ALG_NONDETERMINISTIC", "numeric_literal"); + subst("CUSPARSE_SPGEMM_DEFAULT", "HIPSPARSE_SPGEMM_DEFAULT", "numeric_literal"); + subst("CUSPARSE_SPMAT_DIAG_TYPE", "HIPSPARSE_SPMAT_DIAG_TYPE", "numeric_literal"); + subst("CUSPARSE_SPMAT_FILL_MODE", "HIPSPARSE_SPMAT_FILL_MODE", "numeric_literal"); + subst("CUSPARSE_SPMM_ALG_DEFAULT", "HIPSPARSE_SPMM_ALG_DEFAULT", "numeric_literal"); + subst("CUSPARSE_SPMM_BLOCKED_ELL_ALG1", "HIPSPARSE_SPMM_BLOCKED_ELL_ALG1", "numeric_literal"); + subst("CUSPARSE_SPMM_COO_ALG1", "HIPSPARSE_SPMM_COO_ALG1", "numeric_literal"); + subst("CUSPARSE_SPMM_COO_ALG2", "HIPSPARSE_SPMM_COO_ALG2", "numeric_literal"); + subst("CUSPARSE_SPMM_COO_ALG3", "HIPSPARSE_SPMM_COO_ALG3", "numeric_literal"); + subst("CUSPARSE_SPMM_COO_ALG4", "HIPSPARSE_SPMM_COO_ALG4", "numeric_literal"); + subst("CUSPARSE_SPMM_CSR_ALG1", "HIPSPARSE_SPMM_CSR_ALG1", "numeric_literal"); + subst("CUSPARSE_SPMM_CSR_ALG2", "HIPSPARSE_SPMM_CSR_ALG2", "numeric_literal"); + subst("CUSPARSE_SPMM_CSR_ALG3", "HIPSPARSE_SPMM_CSR_ALG3", "numeric_literal"); + subst("CUSPARSE_SPMV_ALG_DEFAULT", "HIPSPARSE_SPMV_ALG_DEFAULT", "numeric_literal"); + subst("CUSPARSE_SPMV_COO_ALG1", "HIPSPARSE_SPMV_COO_ALG1", "numeric_literal"); + subst("CUSPARSE_SPMV_COO_ALG2", "HIPSPARSE_SPMV_COO_ALG2", "numeric_literal"); + subst("CUSPARSE_SPMV_CSR_ALG1", "HIPSPARSE_SPMV_CSR_ALG1", "numeric_literal"); + subst("CUSPARSE_SPMV_CSR_ALG2", "HIPSPARSE_SPMV_CSR_ALG2", "numeric_literal"); + subst("CUSPARSE_SPSM_ALG_DEFAULT", "HIPSPARSE_SPSM_ALG_DEFAULT", "numeric_literal"); + subst("CUSPARSE_SPSV_ALG_DEFAULT", "HIPSPARSE_SPSV_ALG_DEFAULT", "numeric_literal"); + subst("CUSPARSE_STATUS_ALLOC_FAILED", "HIPSPARSE_STATUS_ALLOC_FAILED", "numeric_literal"); + subst("CUSPARSE_STATUS_ARCH_MISMATCH", "HIPSPARSE_STATUS_ARCH_MISMATCH", "numeric_literal"); + subst("CUSPARSE_STATUS_EXECUTION_FAILED", "HIPSPARSE_STATUS_EXECUTION_FAILED", "numeric_literal"); + subst("CUSPARSE_STATUS_INSUFFICIENT_RESOURCES", "HIPSPARSE_STATUS_INSUFFICIENT_RESOURCES", "numeric_literal"); + subst("CUSPARSE_STATUS_INTERNAL_ERROR", "HIPSPARSE_STATUS_INTERNAL_ERROR", "numeric_literal"); + subst("CUSPARSE_STATUS_INVALID_VALUE", "HIPSPARSE_STATUS_INVALID_VALUE", "numeric_literal"); + subst("CUSPARSE_STATUS_MAPPING_ERROR", "HIPSPARSE_STATUS_MAPPING_ERROR", "numeric_literal"); + subst("CUSPARSE_STATUS_MATRIX_TYPE_NOT_SUPPORTED", "HIPSPARSE_STATUS_MATRIX_TYPE_NOT_SUPPORTED", "numeric_literal"); + subst("CUSPARSE_STATUS_NOT_INITIALIZED", "HIPSPARSE_STATUS_NOT_INITIALIZED", "numeric_literal"); + subst("CUSPARSE_STATUS_NOT_SUPPORTED", "HIPSPARSE_STATUS_NOT_SUPPORTED", "numeric_literal"); + subst("CUSPARSE_STATUS_SUCCESS", "HIPSPARSE_STATUS_SUCCESS", "numeric_literal"); + subst("CUSPARSE_STATUS_ZERO_PIVOT", "HIPSPARSE_STATUS_ZERO_PIVOT", "numeric_literal"); + subst("CU_ACCESS_PROPERTY_NORMAL", "hipAccessPropertyNormal", "numeric_literal"); + subst("CU_ACCESS_PROPERTY_PERSISTING", "hipAccessPropertyPersisting", "numeric_literal"); + subst("CU_ACCESS_PROPERTY_STREAMING", "hipAccessPropertyStreaming", "numeric_literal"); + subst("CU_AD_FORMAT_FLOAT", "HIP_AD_FORMAT_FLOAT", "numeric_literal"); + subst("CU_AD_FORMAT_HALF", "HIP_AD_FORMAT_HALF", "numeric_literal"); + subst("CU_AD_FORMAT_SIGNED_INT16", "HIP_AD_FORMAT_SIGNED_INT16", "numeric_literal"); + subst("CU_AD_FORMAT_SIGNED_INT32", "HIP_AD_FORMAT_SIGNED_INT32", "numeric_literal"); + subst("CU_AD_FORMAT_SIGNED_INT8", "HIP_AD_FORMAT_SIGNED_INT8", "numeric_literal"); + subst("CU_AD_FORMAT_UNSIGNED_INT16", "HIP_AD_FORMAT_UNSIGNED_INT16", "numeric_literal"); + subst("CU_AD_FORMAT_UNSIGNED_INT32", "HIP_AD_FORMAT_UNSIGNED_INT32", "numeric_literal"); + subst("CU_AD_FORMAT_UNSIGNED_INT8", "HIP_AD_FORMAT_UNSIGNED_INT8", "numeric_literal"); + subst("CU_ARRAY_SPARSE_SUBRESOURCE_TYPE_MIPTAIL", "hipArraySparseSubresourceTypeMiptail", "numeric_literal"); + subst("CU_ARRAY_SPARSE_SUBRESOURCE_TYPE_SPARSE_LEVEL", "hipArraySparseSubresourceTypeSparseLevel", "numeric_literal"); + subst("CU_COMPUTEMODE_DEFAULT", "hipComputeModeDefault", "numeric_literal"); + subst("CU_COMPUTEMODE_EXCLUSIVE", "hipComputeModeExclusive", "numeric_literal"); + subst("CU_COMPUTEMODE_EXCLUSIVE_PROCESS", "hipComputeModeExclusiveProcess", "numeric_literal"); + subst("CU_COMPUTEMODE_PROHIBITED", "hipComputeModeProhibited", "numeric_literal"); + subst("CU_CTX_BLOCKING_SYNC", "hipDeviceScheduleBlockingSync", "numeric_literal"); + subst("CU_CTX_LMEM_RESIZE_TO_MAX", "hipDeviceLmemResizeToMax", "numeric_literal"); + subst("CU_CTX_MAP_HOST", "hipDeviceMapHost", "numeric_literal"); + subst("CU_CTX_SCHED_AUTO", "hipDeviceScheduleAuto", "numeric_literal"); + subst("CU_CTX_SCHED_BLOCKING_SYNC", "hipDeviceScheduleBlockingSync", "numeric_literal"); + subst("CU_CTX_SCHED_MASK", "hipDeviceScheduleMask", "numeric_literal"); + subst("CU_CTX_SCHED_SPIN", "hipDeviceScheduleSpin", "numeric_literal"); + subst("CU_CTX_SCHED_YIELD", "hipDeviceScheduleYield", "numeric_literal"); + subst("CU_DEVICE_ATTRIBUTE_ASYNC_ENGINE_COUNT", "hipDeviceAttributeAsyncEngineCount", "numeric_literal"); + subst("CU_DEVICE_ATTRIBUTE_CAN_MAP_HOST_MEMORY", "hipDeviceAttributeCanMapHostMemory", "numeric_literal"); + subst("CU_DEVICE_ATTRIBUTE_CAN_USE_HOST_POINTER_FOR_REGISTERED_MEM", "hipDeviceAttributeCanUseHostPointerForRegisteredMem", "numeric_literal"); + subst("CU_DEVICE_ATTRIBUTE_CAN_USE_STREAM_WAIT_VALUE_NOR", "hipDeviceAttributeCanUseStreamWaitValue", "numeric_literal"); + subst("CU_DEVICE_ATTRIBUTE_CAN_USE_STREAM_WAIT_VALUE_NOR_V1", "hipDeviceAttributeCanUseStreamWaitValue", "numeric_literal"); + subst("CU_DEVICE_ATTRIBUTE_CLOCK_RATE", "hipDeviceAttributeClockRate", "numeric_literal"); + subst("CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR", "hipDeviceAttributeComputeCapabilityMajor", "numeric_literal"); + subst("CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR", "hipDeviceAttributeComputeCapabilityMinor", "numeric_literal"); + subst("CU_DEVICE_ATTRIBUTE_COMPUTE_MODE", "hipDeviceAttributeComputeMode", "numeric_literal"); + subst("CU_DEVICE_ATTRIBUTE_COMPUTE_PREEMPTION_SUPPORTED", "hipDeviceAttributeComputePreemptionSupported", "numeric_literal"); + subst("CU_DEVICE_ATTRIBUTE_CONCURRENT_KERNELS", "hipDeviceAttributeConcurrentKernels", "numeric_literal"); + subst("CU_DEVICE_ATTRIBUTE_CONCURRENT_MANAGED_ACCESS", "hipDeviceAttributeConcurrentManagedAccess", "numeric_literal"); + subst("CU_DEVICE_ATTRIBUTE_COOPERATIVE_LAUNCH", "hipDeviceAttributeCooperativeLaunch", "numeric_literal"); + subst("CU_DEVICE_ATTRIBUTE_COOPERATIVE_MULTI_DEVICE_LAUNCH", "hipDeviceAttributeCooperativeMultiDeviceLaunch", "numeric_literal"); + subst("CU_DEVICE_ATTRIBUTE_DIRECT_MANAGED_MEM_ACCESS_FROM_HOST", "hipDeviceAttributeDirectManagedMemAccessFromHost", "numeric_literal"); + subst("CU_DEVICE_ATTRIBUTE_ECC_ENABLED", "hipDeviceAttributeEccEnabled", "numeric_literal"); + subst("CU_DEVICE_ATTRIBUTE_GLOBAL_L1_CACHE_SUPPORTED", "hipDeviceAttributeGlobalL1CacheSupported", "numeric_literal"); + subst("CU_DEVICE_ATTRIBUTE_GLOBAL_MEMORY_BUS_WIDTH", "hipDeviceAttributeMemoryBusWidth", "numeric_literal"); + subst("CU_DEVICE_ATTRIBUTE_GPU_OVERLAP", "hipDeviceAttributeAsyncEngineCount", "numeric_literal"); + subst("CU_DEVICE_ATTRIBUTE_HOST_NATIVE_ATOMIC_SUPPORTED", "hipDeviceAttributeHostNativeAtomicSupported", "numeric_literal"); + subst("CU_DEVICE_ATTRIBUTE_HOST_REGISTER_SUPPORTED", "hipDeviceAttributeHostRegisterSupported", "numeric_literal"); + subst("CU_DEVICE_ATTRIBUTE_INTEGRATED", "hipDeviceAttributeIntegrated", "numeric_literal"); + subst("CU_DEVICE_ATTRIBUTE_KERNEL_EXEC_TIMEOUT", "hipDeviceAttributeKernelExecTimeout", "numeric_literal"); + subst("CU_DEVICE_ATTRIBUTE_L2_CACHE_SIZE", "hipDeviceAttributeL2CacheSize", "numeric_literal"); + subst("CU_DEVICE_ATTRIBUTE_LOCAL_L1_CACHE_SUPPORTED", "hipDeviceAttributeLocalL1CacheSupported", "numeric_literal"); + subst("CU_DEVICE_ATTRIBUTE_MANAGED_MEMORY", "hipDeviceAttributeManagedMemory", "numeric_literal"); + subst("CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE1D_LAYERED_WIDTH", "hipDeviceAttributeMaxSurface1DLayered", "numeric_literal"); + subst("CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE1D_WIDTH", "hipDeviceAttributeMaxSurface1D", "numeric_literal"); + subst("CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_HEIGHT", "hipDeviceAttributeMaxSurface2D", "numeric_literal"); + subst("CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_LAYERED_HEIGHT", "hipDeviceAttributeMaxSurface2DLayered", "numeric_literal"); + subst("CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_LAYERED_WIDTH", "hipDeviceAttributeMaxSurface2DLayered", "numeric_literal"); + subst("CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_WIDTH", "hipDeviceAttributeMaxSurface2D", "numeric_literal"); + subst("CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE3D_DEPTH", "hipDeviceAttributeMaxSurface3D", "numeric_literal"); + subst("CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE3D_HEIGHT", "hipDeviceAttributeMaxSurface3D", "numeric_literal"); + subst("CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE3D_WIDTH", "hipDeviceAttributeMaxSurface3D", "numeric_literal"); + subst("CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACECUBEMAP_LAYERED_WIDTH", "hipDeviceAttributeMaxSurfaceCubemapLayered", "numeric_literal"); + subst("CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACECUBEMAP_WIDTH", "hipDeviceAttributeMaxSurfaceCubemap", "numeric_literal"); + subst("CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_LAYERED_WIDTH", "hipDeviceAttributeMaxTexture1DLayered", "numeric_literal"); + subst("CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_LINEAR_WIDTH", "hipDeviceAttributeMaxTexture1DLinear", "numeric_literal"); + subst("CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_MIPMAPPED_WIDTH", "hipDeviceAttributeMaxTexture1DMipmap", "numeric_literal"); + subst("CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_WIDTH", "hipDeviceAttributeMaxTexture1DWidth", "numeric_literal"); + subst("CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_ARRAY_HEIGHT", "hipDeviceAttributeMaxTexture2DLayered", "numeric_literal"); + subst("CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_ARRAY_WIDTH", "hipDeviceAttributeMaxTexture2DLayered", "numeric_literal"); + subst("CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_GATHER_HEIGHT", "hipDeviceAttributeMaxTexture2DGather", "numeric_literal"); + subst("CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_GATHER_WIDTH", "hipDeviceAttributeMaxTexture2DGather", "numeric_literal"); + subst("CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_HEIGHT", "hipDeviceAttributeMaxTexture2DHeight", "numeric_literal"); + subst("CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LAYERED_HEIGHT", "hipDeviceAttributeMaxTexture2DLayered", "numeric_literal"); + subst("CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LAYERED_WIDTH", "hipDeviceAttributeMaxTexture2DLayered", "numeric_literal"); + subst("CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LINEAR_HEIGHT", "hipDeviceAttributeMaxTexture2DLinear", "numeric_literal"); + subst("CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LINEAR_PITCH", "hipDeviceAttributeMaxTexture2DLinear", "numeric_literal"); + subst("CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LINEAR_WIDTH", "hipDeviceAttributeMaxTexture2DLinear", "numeric_literal"); + subst("CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_MIPMAPPED_HEIGHT", "hipDeviceAttributeMaxTexture2DMipmap", "numeric_literal"); + subst("CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_MIPMAPPED_WIDTH", "hipDeviceAttributeMaxTexture2DMipmap", "numeric_literal"); + subst("CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_WIDTH", "hipDeviceAttributeMaxTexture2DWidth", "numeric_literal"); + subst("CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_DEPTH", "hipDeviceAttributeMaxTexture3DDepth", "numeric_literal"); + subst("CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_DEPTH_ALTERNATE", "hipDeviceAttributeMaxTexture3DAlt", "numeric_literal"); + subst("CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_HEIGHT", "hipDeviceAttributeMaxTexture3DHeight", "numeric_literal"); + subst("CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_HEIGHT_ALTERNATE", "hipDeviceAttributeMaxTexture3DAlt", "numeric_literal"); + subst("CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_WIDTH", "hipDeviceAttributeMaxTexture3DWidth", "numeric_literal"); + subst("CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_WIDTH_ALTERNATE", "hipDeviceAttributeMaxTexture3DAlt", "numeric_literal"); + subst("CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURECUBEMAP_LAYERED_WIDTH", "hipDeviceAttributeMaxTextureCubemapLayered", "numeric_literal"); + subst("CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURECUBEMAP_WIDTH", "hipDeviceAttributeMaxTextureCubemap", "numeric_literal"); + subst("CU_DEVICE_ATTRIBUTE_MAX_BLOCKS_PER_MULTIPROCESSOR", "hipDeviceAttributeMaxBlocksPerMultiprocessor", "numeric_literal"); + subst("CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_X", "hipDeviceAttributeMaxBlockDimX", "numeric_literal"); + subst("CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_Y", "hipDeviceAttributeMaxBlockDimY", "numeric_literal"); + subst("CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_Z", "hipDeviceAttributeMaxBlockDimZ", "numeric_literal"); + subst("CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_X", "hipDeviceAttributeMaxGridDimX", "numeric_literal"); + subst("CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_Y", "hipDeviceAttributeMaxGridDimY", "numeric_literal"); + subst("CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_Z", "hipDeviceAttributeMaxGridDimZ", "numeric_literal"); + subst("CU_DEVICE_ATTRIBUTE_MAX_PITCH", "hipDeviceAttributeMaxPitch", "numeric_literal"); + subst("CU_DEVICE_ATTRIBUTE_MAX_REGISTERS_PER_BLOCK", "hipDeviceAttributeMaxRegistersPerBlock", "numeric_literal"); + subst("CU_DEVICE_ATTRIBUTE_MAX_REGISTERS_PER_MULTIPROCESSOR", "hipDeviceAttributeMaxRegistersPerMultiprocessor", "numeric_literal"); + subst("CU_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_BLOCK", "hipDeviceAttributeMaxSharedMemoryPerBlock", "numeric_literal"); + subst("CU_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_BLOCK_OPTIN", "hipDeviceAttributeSharedMemPerBlockOptin", "numeric_literal"); + subst("CU_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_MULTIPROCESSOR", "hipDeviceAttributeMaxSharedMemoryPerMultiprocessor", "numeric_literal"); + subst("CU_DEVICE_ATTRIBUTE_MAX_THREADS_PER_BLOCK", "hipDeviceAttributeMaxThreadsPerBlock", "numeric_literal"); + subst("CU_DEVICE_ATTRIBUTE_MAX_THREADS_PER_MULTIPROCESSOR", "hipDeviceAttributeMaxThreadsPerMultiProcessor", "numeric_literal"); + subst("CU_DEVICE_ATTRIBUTE_MEMORY_CLOCK_RATE", "hipDeviceAttributeMemoryClockRate", "numeric_literal"); + subst("CU_DEVICE_ATTRIBUTE_MEMORY_POOLS_SUPPORTED", "hipDeviceAttributeMemoryPoolsSupported", "numeric_literal"); + subst("CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT", "hipDeviceAttributeMultiprocessorCount", "numeric_literal"); + subst("CU_DEVICE_ATTRIBUTE_MULTI_GPU_BOARD", "hipDeviceAttributeIsMultiGpuBoard", "numeric_literal"); + subst("CU_DEVICE_ATTRIBUTE_MULTI_GPU_BOARD_GROUP_ID", "hipDeviceAttributeMultiGpuBoardGroupId", "numeric_literal"); + subst("CU_DEVICE_ATTRIBUTE_PAGEABLE_MEMORY_ACCESS", "hipDeviceAttributePageableMemoryAccess", "numeric_literal"); + subst("CU_DEVICE_ATTRIBUTE_PAGEABLE_MEMORY_ACCESS_USES_HOST_PAGE_TABLES", "hipDeviceAttributePageableMemoryAccessUsesHostPageTables", "numeric_literal"); + subst("CU_DEVICE_ATTRIBUTE_PCI_BUS_ID", "hipDeviceAttributePciBusId", "numeric_literal"); + subst("CU_DEVICE_ATTRIBUTE_PCI_DEVICE_ID", "hipDeviceAttributePciDeviceId", "numeric_literal"); + subst("CU_DEVICE_ATTRIBUTE_PCI_DOMAIN_ID", "hipDeviceAttributePciDomainID", "numeric_literal"); + subst("CU_DEVICE_ATTRIBUTE_REGISTERS_PER_BLOCK", "hipDeviceAttributeMaxRegistersPerBlock", "numeric_literal"); + subst("CU_DEVICE_ATTRIBUTE_SHARED_MEMORY_PER_BLOCK", "hipDeviceAttributeMaxSharedMemoryPerBlock", "numeric_literal"); + subst("CU_DEVICE_ATTRIBUTE_SINGLE_TO_DOUBLE_PRECISION_PERF_RATIO", "hipDeviceAttributeSingleToDoublePrecisionPerfRatio", "numeric_literal"); + subst("CU_DEVICE_ATTRIBUTE_STREAM_PRIORITIES_SUPPORTED", "hipDeviceAttributeStreamPrioritiesSupported", "numeric_literal"); + subst("CU_DEVICE_ATTRIBUTE_SURFACE_ALIGNMENT", "hipDeviceAttributeSurfaceAlignment", "numeric_literal"); + subst("CU_DEVICE_ATTRIBUTE_TCC_DRIVER", "hipDeviceAttributeTccDriver", "numeric_literal"); + subst("CU_DEVICE_ATTRIBUTE_TEXTURE_ALIGNMENT", "hipDeviceAttributeTextureAlignment", "numeric_literal"); + subst("CU_DEVICE_ATTRIBUTE_TEXTURE_PITCH_ALIGNMENT", "hipDeviceAttributeTexturePitchAlignment", "numeric_literal"); + subst("CU_DEVICE_ATTRIBUTE_TOTAL_CONSTANT_MEMORY", "hipDeviceAttributeTotalConstantMemory", "numeric_literal"); + subst("CU_DEVICE_ATTRIBUTE_UNIFIED_ADDRESSING", "hipDeviceAttributeUnifiedAddressing", "numeric_literal"); + subst("CU_DEVICE_ATTRIBUTE_VIRTUAL_MEMORY_MANAGEMENT_SUPPORTED", "hipDeviceAttributeVirtualMemoryManagementSupported", "numeric_literal"); + subst("CU_DEVICE_ATTRIBUTE_WARP_SIZE", "hipDeviceAttributeWarpSize", "numeric_literal"); + subst("CU_DEVICE_P2P_ATTRIBUTE_ACCESS_ACCESS_SUPPORTED", "hipDevP2PAttrHipArrayAccessSupported", "numeric_literal"); + subst("CU_DEVICE_P2P_ATTRIBUTE_ACCESS_SUPPORTED", "hipDevP2PAttrAccessSupported", "numeric_literal"); + subst("CU_DEVICE_P2P_ATTRIBUTE_ARRAY_ACCESS_ACCESS_SUPPORTED", "hipDevP2PAttrHipArrayAccessSupported", "numeric_literal"); + subst("CU_DEVICE_P2P_ATTRIBUTE_CUDA_ARRAY_ACCESS_SUPPORTED", "hipDevP2PAttrHipArrayAccessSupported", "numeric_literal"); + subst("CU_DEVICE_P2P_ATTRIBUTE_NATIVE_ATOMIC_SUPPORTED", "hipDevP2PAttrNativeAtomicSupported", "numeric_literal"); + subst("CU_DEVICE_P2P_ATTRIBUTE_PERFORMANCE_RANK", "hipDevP2PAttrPerformanceRank", "numeric_literal"); + subst("CU_EVENT_BLOCKING_SYNC", "hipEventBlockingSync", "numeric_literal"); + subst("CU_EVENT_DEFAULT", "hipEventDefault", "numeric_literal"); + subst("CU_EVENT_DISABLE_TIMING", "hipEventDisableTiming", "numeric_literal"); + subst("CU_EVENT_INTERPROCESS", "hipEventInterprocess", "numeric_literal"); + subst("CU_EXTERNAL_MEMORY_HANDLE_TYPE_D3D11_RESOURCE", "hipExternalMemoryHandleTypeD3D11Resource", "numeric_literal"); + subst("CU_EXTERNAL_MEMORY_HANDLE_TYPE_D3D11_RESOURCE_KMT", "hipExternalMemoryHandleTypeD3D11ResourceKmt", "numeric_literal"); + subst("CU_EXTERNAL_MEMORY_HANDLE_TYPE_D3D12_HEAP", "hipExternalMemoryHandleTypeD3D12Heap", "numeric_literal"); + subst("CU_EXTERNAL_MEMORY_HANDLE_TYPE_D3D12_RESOURCE", "hipExternalMemoryHandleTypeD3D12Resource", "numeric_literal"); + subst("CU_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD", "hipExternalMemoryHandleTypeOpaqueFd", "numeric_literal"); + subst("CU_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32", "hipExternalMemoryHandleTypeOpaqueWin32", "numeric_literal"); + subst("CU_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_KMT", "hipExternalMemoryHandleTypeOpaqueWin32Kmt", "numeric_literal"); + subst("CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_D3D12_FENCE", "hipExternalSemaphoreHandleTypeD3D12Fence", "numeric_literal"); + subst("CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD", "hipExternalSemaphoreHandleTypeOpaqueFd", "numeric_literal"); + subst("CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_WIN32", "hipExternalSemaphoreHandleTypeOpaqueWin32", "numeric_literal"); + subst("CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_WIN32_KMT", "hipExternalSemaphoreHandleTypeOpaqueWin32Kmt", "numeric_literal"); + subst("CU_FLUSH_GPU_DIRECT_RDMA_WRITES_OPTION_HOST", "hipFlushGPUDirectRDMAWritesOptionHost", "numeric_literal"); + subst("CU_FLUSH_GPU_DIRECT_RDMA_WRITES_OPTION_MEMOPS", "hipFlushGPUDirectRDMAWritesOptionMemOps", "numeric_literal"); + subst("CU_FUNC_ATTRIBUTE_BINARY_VERSION", "HIP_FUNC_ATTRIBUTE_BINARY_VERSION", "numeric_literal"); + subst("CU_FUNC_ATTRIBUTE_CACHE_MODE_CA", "HIP_FUNC_ATTRIBUTE_CACHE_MODE_CA", "numeric_literal"); + subst("CU_FUNC_ATTRIBUTE_CONST_SIZE_BYTES", "HIP_FUNC_ATTRIBUTE_CONST_SIZE_BYTES", "numeric_literal"); + subst("CU_FUNC_ATTRIBUTE_LOCAL_SIZE_BYTES", "HIP_FUNC_ATTRIBUTE_LOCAL_SIZE_BYTES", "numeric_literal"); + subst("CU_FUNC_ATTRIBUTE_MAX", "HIP_FUNC_ATTRIBUTE_MAX", "numeric_literal"); + subst("CU_FUNC_ATTRIBUTE_MAX_DYNAMIC_SHARED_SIZE_BYTES", "HIP_FUNC_ATTRIBUTE_MAX_DYNAMIC_SHARED_SIZE_BYTES", "numeric_literal"); + subst("CU_FUNC_ATTRIBUTE_MAX_THREADS_PER_BLOCK", "HIP_FUNC_ATTRIBUTE_MAX_THREADS_PER_BLOCK", "numeric_literal"); + subst("CU_FUNC_ATTRIBUTE_NUM_REGS", "HIP_FUNC_ATTRIBUTE_NUM_REGS", "numeric_literal"); + subst("CU_FUNC_ATTRIBUTE_PREFERRED_SHARED_MEMORY_CARVEOUT", "HIP_FUNC_ATTRIBUTE_PREFERRED_SHARED_MEMORY_CARVEOUT", "numeric_literal"); + subst("CU_FUNC_ATTRIBUTE_PTX_VERSION", "HIP_FUNC_ATTRIBUTE_PTX_VERSION", "numeric_literal"); + subst("CU_FUNC_ATTRIBUTE_SHARED_SIZE_BYTES", "HIP_FUNC_ATTRIBUTE_SHARED_SIZE_BYTES", "numeric_literal"); + subst("CU_FUNC_CACHE_PREFER_EQUAL", "hipFuncCachePreferEqual", "numeric_literal"); + subst("CU_FUNC_CACHE_PREFER_L1", "hipFuncCachePreferL1", "numeric_literal"); + subst("CU_FUNC_CACHE_PREFER_NONE", "hipFuncCachePreferNone", "numeric_literal"); + subst("CU_FUNC_CACHE_PREFER_SHARED", "hipFuncCachePreferShared", "numeric_literal"); + subst("CU_GL_DEVICE_LIST_ALL", "hipGLDeviceListAll", "numeric_literal"); + subst("CU_GL_DEVICE_LIST_CURRENT_FRAME", "hipGLDeviceListCurrentFrame", "numeric_literal"); + subst("CU_GL_DEVICE_LIST_NEXT_FRAME", "hipGLDeviceListNextFrame", "numeric_literal"); + subst("CU_GPU_DIRECT_RDMA_WRITES_ORDERING_ALL_DEVICES", "hipGPUDirectRDMAWritesOrderingAllDevices", "numeric_literal"); + subst("CU_GPU_DIRECT_RDMA_WRITES_ORDERING_NONE", "hipGPUDirectRDMAWritesOrderingNone", "numeric_literal"); + subst("CU_GPU_DIRECT_RDMA_WRITES_ORDERING_OWNER", "hipGPUDirectRDMAWritesOrderingOwner", "numeric_literal"); + subst("CU_GRAPHICS_REGISTER_FLAGS_NONE", "hipGraphicsRegisterFlagsNone", "numeric_literal"); + subst("CU_GRAPHICS_REGISTER_FLAGS_READ_ONLY", "hipGraphicsRegisterFlagsReadOnly", "numeric_literal"); + subst("CU_GRAPHICS_REGISTER_FLAGS_SURFACE_LDST", "hipGraphicsRegisterFlagsSurfaceLoadStore", "numeric_literal"); + subst("CU_GRAPHICS_REGISTER_FLAGS_TEXTURE_GATHER", "hipGraphicsRegisterFlagsTextureGather", "numeric_literal"); + subst("CU_GRAPHICS_REGISTER_FLAGS_WRITE_DISCARD", "hipGraphicsRegisterFlagsWriteDiscard", "numeric_literal"); + subst("CU_GRAPH_DEBUG_DOT_FLAGS_EVENT_NODE_PARAMS", "hipGraphDebugDotFlagsEventNodeParams", "numeric_literal"); + subst("CU_GRAPH_DEBUG_DOT_FLAGS_EXT_SEMAS_SIGNAL_NODE_PARAMS", "hipGraphDebugDotFlagsExtSemasSignalNodeParams", "numeric_literal"); + subst("CU_GRAPH_DEBUG_DOT_FLAGS_EXT_SEMAS_WAIT_NODE_PARAMS", "hipGraphDebugDotFlagsExtSemasWaitNodeParams", "numeric_literal"); + subst("CU_GRAPH_DEBUG_DOT_FLAGS_HANDLES", "hipGraphDebugDotFlagsHandles", "numeric_literal"); + subst("CU_GRAPH_DEBUG_DOT_FLAGS_HOST_NODE_PARAMS", "hipGraphDebugDotFlagsHostNodeParams", "numeric_literal"); + subst("CU_GRAPH_DEBUG_DOT_FLAGS_KERNEL_NODE_ATTRIBUTES", "hipGraphDebugDotFlagsKernelNodeAttributes", "numeric_literal"); + subst("CU_GRAPH_DEBUG_DOT_FLAGS_KERNEL_NODE_PARAMS", "hipGraphDebugDotFlagsKernelNodeParams", "numeric_literal"); + subst("CU_GRAPH_DEBUG_DOT_FLAGS_MEMCPY_NODE_PARAMS", "hipGraphDebugDotFlagsMemcpyNodeParams", "numeric_literal"); + subst("CU_GRAPH_DEBUG_DOT_FLAGS_MEMSET_NODE_PARAMS", "hipGraphDebugDotFlagsMemsetNodeParams", "numeric_literal"); + subst("CU_GRAPH_DEBUG_DOT_FLAGS_RUNTIME_TYPES", "hipGraphDebugDotFlagsRuntimeTypes", "numeric_literal"); + subst("CU_GRAPH_DEBUG_DOT_FLAGS_VERBOSE", "hipGraphDebugDotFlagsVerbose", "numeric_literal"); + subst("CU_GRAPH_EXEC_UPDATE_ERROR", "hipGraphExecUpdateError", "numeric_literal"); + subst("CU_GRAPH_EXEC_UPDATE_ERROR_FUNCTION_CHANGED", "hipGraphExecUpdateErrorFunctionChanged", "numeric_literal"); + subst("CU_GRAPH_EXEC_UPDATE_ERROR_NODE_TYPE_CHANGED", "hipGraphExecUpdateErrorNodeTypeChanged", "numeric_literal"); + subst("CU_GRAPH_EXEC_UPDATE_ERROR_NOT_SUPPORTED", "hipGraphExecUpdateErrorNotSupported", "numeric_literal"); + subst("CU_GRAPH_EXEC_UPDATE_ERROR_PARAMETERS_CHANGED", "hipGraphExecUpdateErrorParametersChanged", "numeric_literal"); + subst("CU_GRAPH_EXEC_UPDATE_ERROR_TOPOLOGY_CHANGED", "hipGraphExecUpdateErrorTopologyChanged", "numeric_literal"); + subst("CU_GRAPH_EXEC_UPDATE_ERROR_UNSUPPORTED_FUNCTION_CHANGE", "hipGraphExecUpdateErrorUnsupportedFunctionChange", "numeric_literal"); + subst("CU_GRAPH_EXEC_UPDATE_SUCCESS", "hipGraphExecUpdateSuccess", "numeric_literal"); + subst("CU_GRAPH_MEM_ATTR_RESERVED_MEM_CURRENT", "hipGraphMemAttrReservedMemCurrent", "numeric_literal"); + subst("CU_GRAPH_MEM_ATTR_RESERVED_MEM_HIGH", "hipGraphMemAttrReservedMemHigh", "numeric_literal"); + subst("CU_GRAPH_MEM_ATTR_USED_MEM_CURRENT", "hipGraphMemAttrUsedMemCurrent", "numeric_literal"); + subst("CU_GRAPH_MEM_ATTR_USED_MEM_HIGH", "hipGraphMemAttrUsedMemHigh", "numeric_literal"); + subst("CU_GRAPH_NODE_TYPE_COUNT", "hipGraphNodeTypeCount", "numeric_literal"); + subst("CU_GRAPH_NODE_TYPE_EMPTY", "hipGraphNodeTypeEmpty", "numeric_literal"); + subst("CU_GRAPH_NODE_TYPE_EVENT_RECORD", "hipGraphNodeTypeEventRecord", "numeric_literal"); + subst("CU_GRAPH_NODE_TYPE_EXT_SEMAS_SIGNAL", "hipGraphNodeTypeExtSemaphoreSignal", "numeric_literal"); + subst("CU_GRAPH_NODE_TYPE_EXT_SEMAS_WAIT", "hipGraphNodeTypeExtSemaphoreWait", "numeric_literal"); + subst("CU_GRAPH_NODE_TYPE_GRAPH", "hipGraphNodeTypeGraph", "numeric_literal"); + subst("CU_GRAPH_NODE_TYPE_HOST", "hipGraphNodeTypeHost", "numeric_literal"); + subst("CU_GRAPH_NODE_TYPE_KERNEL", "hipGraphNodeTypeKernel", "numeric_literal"); + subst("CU_GRAPH_NODE_TYPE_MEMCPY", "hipGraphNodeTypeMemcpy", "numeric_literal"); + subst("CU_GRAPH_NODE_TYPE_MEMSET", "hipGraphNodeTypeMemset", "numeric_literal"); + subst("CU_GRAPH_NODE_TYPE_MEM_ALLOC", "hipGraphNodeTypeMemAlloc", "numeric_literal"); + subst("CU_GRAPH_NODE_TYPE_MEM_FREE", "hipGraphNodeTypeMemFree", "numeric_literal"); + subst("CU_GRAPH_NODE_TYPE_WAIT_EVENT", "hipGraphNodeTypeWaitEvent", "numeric_literal"); + subst("CU_GRAPH_USER_OBJECT_MOVE", "hipGraphUserObjectMove", "numeric_literal"); + subst("CU_IPC_MEM_LAZY_ENABLE_PEER_ACCESS", "hipIpcMemLazyEnablePeerAccess", "numeric_literal"); + subst("CU_JIT_CACHE_MODE", "HIPRTC_JIT_CACHE_MODE", "numeric_literal"); + subst("CU_JIT_ERROR_LOG_BUFFER", "HIPRTC_JIT_ERROR_LOG_BUFFER", "numeric_literal"); + subst("CU_JIT_ERROR_LOG_BUFFER_SIZE_BYTES", "HIPRTC_JIT_ERROR_LOG_BUFFER_SIZE_BYTES", "numeric_literal"); + subst("CU_JIT_FALLBACK_STRATEGY", "HIPRTC_JIT_FALLBACK_STRATEGY", "numeric_literal"); + subst("CU_JIT_FAST_COMPILE", "HIPRTC_JIT_FAST_COMPILE", "numeric_literal"); + subst("CU_JIT_GENERATE_DEBUG_INFO", "HIPRTC_JIT_GENERATE_DEBUG_INFO", "numeric_literal"); + subst("CU_JIT_GENERATE_LINE_INFO", "HIPRTC_JIT_GENERATE_LINE_INFO", "numeric_literal"); + subst("CU_JIT_INFO_LOG_BUFFER", "HIPRTC_JIT_INFO_LOG_BUFFER", "numeric_literal"); + subst("CU_JIT_INFO_LOG_BUFFER_SIZE_BYTES", "HIPRTC_JIT_INFO_LOG_BUFFER_SIZE_BYTES", "numeric_literal"); + subst("CU_JIT_INPUT_CUBIN", "HIPRTC_JIT_INPUT_CUBIN", "numeric_literal"); + subst("CU_JIT_INPUT_FATBINARY", "HIPRTC_JIT_INPUT_FATBINARY", "numeric_literal"); + subst("CU_JIT_INPUT_LIBRARY", "HIPRTC_JIT_INPUT_LIBRARY", "numeric_literal"); + subst("CU_JIT_INPUT_NVVM", "HIPRTC_JIT_INPUT_NVVM", "numeric_literal"); + subst("CU_JIT_INPUT_OBJECT", "HIPRTC_JIT_INPUT_OBJECT", "numeric_literal"); + subst("CU_JIT_INPUT_PTX", "HIPRTC_JIT_INPUT_PTX", "numeric_literal"); + subst("CU_JIT_LOG_VERBOSE", "HIPRTC_JIT_LOG_VERBOSE", "numeric_literal"); + subst("CU_JIT_MAX_REGISTERS", "HIPRTC_JIT_MAX_REGISTERS", "numeric_literal"); + subst("CU_JIT_NEW_SM3X_OPT", "HIPRTC_JIT_NEW_SM3X_OPT", "numeric_literal"); + subst("CU_JIT_NUM_INPUT_TYPES", "HIPRTC_JIT_NUM_LEGACY_INPUT_TYPES", "numeric_literal"); + subst("CU_JIT_NUM_OPTIONS", "HIPRTC_JIT_NUM_OPTIONS", "numeric_literal"); + subst("CU_JIT_OPTIMIZATION_LEVEL", "HIPRTC_JIT_OPTIMIZATION_LEVEL", "numeric_literal"); + subst("CU_JIT_TARGET", "HIPRTC_JIT_TARGET", "numeric_literal"); + subst("CU_JIT_TARGET_FROM_CUCONTEXT", "HIPRTC_JIT_TARGET_FROM_HIPCONTEXT", "numeric_literal"); + subst("CU_JIT_THREADS_PER_BLOCK", "HIPRTC_JIT_THREADS_PER_BLOCK", "numeric_literal"); + subst("CU_JIT_WALL_TIME", "HIPRTC_JIT_WALL_TIME", "numeric_literal"); + subst("CU_KERNEL_NODE_ATTRIBUTE_ACCESS_POLICY_WINDOW", "hipKernelNodeAttributeAccessPolicyWindow", "numeric_literal"); + subst("CU_KERNEL_NODE_ATTRIBUTE_COOPERATIVE", "hipKernelNodeAttributeCooperative", "numeric_literal"); + subst("CU_LIMIT_MALLOC_HEAP_SIZE", "hipLimitMallocHeapSize", "numeric_literal"); + subst("CU_LIMIT_PRINTF_FIFO_SIZE", "hipLimitPrintfFifoSize", "numeric_literal"); + subst("CU_LIMIT_STACK_SIZE", "hipLimitStackSize", "numeric_literal"); + subst("CU_MEMORYTYPE_ARRAY", "hipMemoryTypeArray", "numeric_literal"); + subst("CU_MEMORYTYPE_DEVICE", "hipMemoryTypeDevice", "numeric_literal"); + subst("CU_MEMORYTYPE_HOST", "hipMemoryTypeHost", "numeric_literal"); + subst("CU_MEMORYTYPE_UNIFIED", "hipMemoryTypeUnified", "numeric_literal"); + subst("CU_MEMPOOL_ATTR_RELEASE_THRESHOLD", "hipMemPoolAttrReleaseThreshold", "numeric_literal"); + subst("CU_MEMPOOL_ATTR_RESERVED_MEM_CURRENT", "hipMemPoolAttrReservedMemCurrent", "numeric_literal"); + subst("CU_MEMPOOL_ATTR_RESERVED_MEM_HIGH", "hipMemPoolAttrReservedMemHigh", "numeric_literal"); + subst("CU_MEMPOOL_ATTR_REUSE_ALLOW_INTERNAL_DEPENDENCIES", "hipMemPoolReuseAllowInternalDependencies", "numeric_literal"); + subst("CU_MEMPOOL_ATTR_REUSE_ALLOW_OPPORTUNISTIC", "hipMemPoolReuseAllowOpportunistic", "numeric_literal"); + subst("CU_MEMPOOL_ATTR_REUSE_FOLLOW_EVENT_DEPENDENCIES", "hipMemPoolReuseFollowEventDependencies", "numeric_literal"); + subst("CU_MEMPOOL_ATTR_USED_MEM_CURRENT", "hipMemPoolAttrUsedMemCurrent", "numeric_literal"); + subst("CU_MEMPOOL_ATTR_USED_MEM_HIGH", "hipMemPoolAttrUsedMemHigh", "numeric_literal"); + subst("CU_MEM_ACCESS_FLAGS_PROT_NONE", "hipMemAccessFlagsProtNone", "numeric_literal"); + subst("CU_MEM_ACCESS_FLAGS_PROT_READ", "hipMemAccessFlagsProtRead", "numeric_literal"); + subst("CU_MEM_ACCESS_FLAGS_PROT_READWRITE", "hipMemAccessFlagsProtReadWrite", "numeric_literal"); + subst("CU_MEM_ADVISE_SET_ACCESSED_BY", "hipMemAdviseSetAccessedBy", "numeric_literal"); + subst("CU_MEM_ADVISE_SET_PREFERRED_LOCATION", "hipMemAdviseSetPreferredLocation", "numeric_literal"); + subst("CU_MEM_ADVISE_SET_READ_MOSTLY", "hipMemAdviseSetReadMostly", "numeric_literal"); + subst("CU_MEM_ADVISE_UNSET_ACCESSED_BY", "hipMemAdviseUnsetAccessedBy", "numeric_literal"); + subst("CU_MEM_ADVISE_UNSET_PREFERRED_LOCATION", "hipMemAdviseUnsetPreferredLocation", "numeric_literal"); + subst("CU_MEM_ADVISE_UNSET_READ_MOSTLY", "hipMemAdviseUnsetReadMostly", "numeric_literal"); + subst("CU_MEM_ALLOCATION_TYPE_INVALID", "hipMemAllocationTypeInvalid", "numeric_literal"); + subst("CU_MEM_ALLOCATION_TYPE_MAX", "hipMemAllocationTypeMax", "numeric_literal"); + subst("CU_MEM_ALLOCATION_TYPE_PINNED", "hipMemAllocationTypePinned", "numeric_literal"); + subst("CU_MEM_ALLOC_GRANULARITY_MINIMUM", "hipMemAllocationGranularityMinimum", "numeric_literal"); + subst("CU_MEM_ALLOC_GRANULARITY_RECOMMENDED", "hipMemAllocationGranularityRecommended", "numeric_literal"); + subst("CU_MEM_ATTACH_GLOBAL", "hipMemAttachGlobal", "numeric_literal"); + subst("CU_MEM_ATTACH_HOST", "hipMemAttachHost", "numeric_literal"); + subst("CU_MEM_ATTACH_SINGLE", "hipMemAttachSingle", "numeric_literal"); + subst("CU_MEM_HANDLE_TYPE_GENERIC", "hipMemHandleTypeGeneric", "numeric_literal"); + subst("CU_MEM_HANDLE_TYPE_NONE", "hipMemHandleTypeNone", "numeric_literal"); + subst("CU_MEM_HANDLE_TYPE_POSIX_FILE_DESCRIPTOR", "hipMemHandleTypePosixFileDescriptor", "numeric_literal"); + subst("CU_MEM_HANDLE_TYPE_WIN32", "hipMemHandleTypeWin32", "numeric_literal"); + subst("CU_MEM_HANDLE_TYPE_WIN32_KMT", "hipMemHandleTypeWin32Kmt", "numeric_literal"); + subst("CU_MEM_LOCATION_TYPE_DEVICE", "hipMemLocationTypeDevice", "numeric_literal"); + subst("CU_MEM_LOCATION_TYPE_INVALID", "hipMemLocationTypeInvalid", "numeric_literal"); + subst("CU_MEM_OPERATION_TYPE_MAP", "hipMemOperationTypeMap", "numeric_literal"); + subst("CU_MEM_OPERATION_TYPE_UNMAP", "hipMemOperationTypeUnmap", "numeric_literal"); + subst("CU_MEM_RANGE_ATTRIBUTE_ACCESSED_BY", "hipMemRangeAttributeAccessedBy", "numeric_literal"); + subst("CU_MEM_RANGE_ATTRIBUTE_LAST_PREFETCH_LOCATION", "hipMemRangeAttributeLastPrefetchLocation", "numeric_literal"); + subst("CU_MEM_RANGE_ATTRIBUTE_PREFERRED_LOCATION", "hipMemRangeAttributePreferredLocation", "numeric_literal"); + subst("CU_MEM_RANGE_ATTRIBUTE_READ_MOSTLY", "hipMemRangeAttributeReadMostly", "numeric_literal"); + subst("CU_OCCUPANCY_DEFAULT", "hipOccupancyDefault", "numeric_literal"); + subst("CU_OCCUPANCY_DISABLE_CACHING_OVERRIDE", "hipOccupancyDisableCachingOverride", "numeric_literal"); + subst("CU_POINTER_ATTRIBUTE_ACCESS_FLAGS", "HIP_POINTER_ATTRIBUTE_ACCESS_FLAGS", "numeric_literal"); + subst("CU_POINTER_ATTRIBUTE_ALLOWED_HANDLE_TYPES", "HIP_POINTER_ATTRIBUTE_ALLOWED_HANDLE_TYPES", "numeric_literal"); + subst("CU_POINTER_ATTRIBUTE_BUFFER_ID", "HIP_POINTER_ATTRIBUTE_BUFFER_ID", "numeric_literal"); + subst("CU_POINTER_ATTRIBUTE_CONTEXT", "HIP_POINTER_ATTRIBUTE_CONTEXT", "numeric_literal"); + subst("CU_POINTER_ATTRIBUTE_DEVICE_ORDINAL", "HIP_POINTER_ATTRIBUTE_DEVICE_ORDINAL", "numeric_literal"); + subst("CU_POINTER_ATTRIBUTE_DEVICE_POINTER", "HIP_POINTER_ATTRIBUTE_DEVICE_POINTER", "numeric_literal"); + subst("CU_POINTER_ATTRIBUTE_HOST_POINTER", "HIP_POINTER_ATTRIBUTE_HOST_POINTER", "numeric_literal"); + subst("CU_POINTER_ATTRIBUTE_IS_GPU_DIRECT_RDMA_CAPABLE", "HIP_POINTER_ATTRIBUTE_IS_GPU_DIRECT_RDMA_CAPABLE", "numeric_literal"); + subst("CU_POINTER_ATTRIBUTE_IS_LEGACY_CUDA_IPC_CAPABLE", "HIP_POINTER_ATTRIBUTE_IS_LEGACY_HIP_IPC_CAPABLE", "numeric_literal"); + subst("CU_POINTER_ATTRIBUTE_IS_MANAGED", "HIP_POINTER_ATTRIBUTE_IS_MANAGED", "numeric_literal"); + subst("CU_POINTER_ATTRIBUTE_MAPPED", "HIP_POINTER_ATTRIBUTE_MAPPED", "numeric_literal"); + subst("CU_POINTER_ATTRIBUTE_MEMORY_TYPE", "HIP_POINTER_ATTRIBUTE_MEMORY_TYPE", "numeric_literal"); + subst("CU_POINTER_ATTRIBUTE_MEMPOOL_HANDLE", "HIP_POINTER_ATTRIBUTE_MEMPOOL_HANDLE", "numeric_literal"); + subst("CU_POINTER_ATTRIBUTE_P2P_TOKENS", "HIP_POINTER_ATTRIBUTE_P2P_TOKENS", "numeric_literal"); + subst("CU_POINTER_ATTRIBUTE_RANGE_SIZE", "HIP_POINTER_ATTRIBUTE_RANGE_SIZE", "numeric_literal"); + subst("CU_POINTER_ATTRIBUTE_RANGE_START_ADDR", "HIP_POINTER_ATTRIBUTE_RANGE_START_ADDR", "numeric_literal"); + subst("CU_POINTER_ATTRIBUTE_SYNC_MEMOPS", "HIP_POINTER_ATTRIBUTE_SYNC_MEMOPS", "numeric_literal"); + subst("CU_RESOURCE_TYPE_ARRAY", "HIP_RESOURCE_TYPE_ARRAY", "numeric_literal"); + subst("CU_RESOURCE_TYPE_LINEAR", "HIP_RESOURCE_TYPE_LINEAR", "numeric_literal"); + subst("CU_RESOURCE_TYPE_MIPMAPPED_ARRAY", "HIP_RESOURCE_TYPE_MIPMAPPED_ARRAY", "numeric_literal"); + subst("CU_RESOURCE_TYPE_PITCH2D", "HIP_RESOURCE_TYPE_PITCH2D", "numeric_literal"); + subst("CU_RES_VIEW_FORMAT_FLOAT_1X16", "HIP_RES_VIEW_FORMAT_FLOAT_1X16", "numeric_literal"); + subst("CU_RES_VIEW_FORMAT_FLOAT_1X32", "HIP_RES_VIEW_FORMAT_FLOAT_1X32", "numeric_literal"); + subst("CU_RES_VIEW_FORMAT_FLOAT_2X16", "HIP_RES_VIEW_FORMAT_FLOAT_2X16", "numeric_literal"); + subst("CU_RES_VIEW_FORMAT_FLOAT_2X32", "HIP_RES_VIEW_FORMAT_FLOAT_2X32", "numeric_literal"); + subst("CU_RES_VIEW_FORMAT_FLOAT_4X16", "HIP_RES_VIEW_FORMAT_FLOAT_4X16", "numeric_literal"); + subst("CU_RES_VIEW_FORMAT_FLOAT_4X32", "HIP_RES_VIEW_FORMAT_FLOAT_4X32", "numeric_literal"); + subst("CU_RES_VIEW_FORMAT_NONE", "HIP_RES_VIEW_FORMAT_NONE", "numeric_literal"); + subst("CU_RES_VIEW_FORMAT_SIGNED_BC4", "HIP_RES_VIEW_FORMAT_SIGNED_BC4", "numeric_literal"); + subst("CU_RES_VIEW_FORMAT_SIGNED_BC5", "HIP_RES_VIEW_FORMAT_SIGNED_BC5", "numeric_literal"); + subst("CU_RES_VIEW_FORMAT_SIGNED_BC6H", "HIP_RES_VIEW_FORMAT_SIGNED_BC6H", "numeric_literal"); + subst("CU_RES_VIEW_FORMAT_SINT_1X16", "HIP_RES_VIEW_FORMAT_SINT_1X16", "numeric_literal"); + subst("CU_RES_VIEW_FORMAT_SINT_1X32", "HIP_RES_VIEW_FORMAT_SINT_1X32", "numeric_literal"); + subst("CU_RES_VIEW_FORMAT_SINT_1X8", "HIP_RES_VIEW_FORMAT_SINT_1X8", "numeric_literal"); + subst("CU_RES_VIEW_FORMAT_SINT_2X16", "HIP_RES_VIEW_FORMAT_SINT_2X16", "numeric_literal"); + subst("CU_RES_VIEW_FORMAT_SINT_2X32", "HIP_RES_VIEW_FORMAT_SINT_2X32", "numeric_literal"); + subst("CU_RES_VIEW_FORMAT_SINT_2X8", "HIP_RES_VIEW_FORMAT_SINT_2X8", "numeric_literal"); + subst("CU_RES_VIEW_FORMAT_SINT_4X16", "HIP_RES_VIEW_FORMAT_SINT_4X16", "numeric_literal"); + subst("CU_RES_VIEW_FORMAT_SINT_4X32", "HIP_RES_VIEW_FORMAT_SINT_4X32", "numeric_literal"); + subst("CU_RES_VIEW_FORMAT_SINT_4X8", "HIP_RES_VIEW_FORMAT_SINT_4X8", "numeric_literal"); + subst("CU_RES_VIEW_FORMAT_UINT_1X16", "HIP_RES_VIEW_FORMAT_UINT_1X16", "numeric_literal"); + subst("CU_RES_VIEW_FORMAT_UINT_1X32", "HIP_RES_VIEW_FORMAT_UINT_1X32", "numeric_literal"); + subst("CU_RES_VIEW_FORMAT_UINT_1X8", "HIP_RES_VIEW_FORMAT_UINT_1X8", "numeric_literal"); + subst("CU_RES_VIEW_FORMAT_UINT_2X16", "HIP_RES_VIEW_FORMAT_UINT_2X16", "numeric_literal"); + subst("CU_RES_VIEW_FORMAT_UINT_2X32", "HIP_RES_VIEW_FORMAT_UINT_2X32", "numeric_literal"); + subst("CU_RES_VIEW_FORMAT_UINT_2X8", "HIP_RES_VIEW_FORMAT_UINT_2X8", "numeric_literal"); + subst("CU_RES_VIEW_FORMAT_UINT_4X16", "HIP_RES_VIEW_FORMAT_UINT_4X16", "numeric_literal"); + subst("CU_RES_VIEW_FORMAT_UINT_4X32", "HIP_RES_VIEW_FORMAT_UINT_4X32", "numeric_literal"); + subst("CU_RES_VIEW_FORMAT_UINT_4X8", "HIP_RES_VIEW_FORMAT_UINT_4X8", "numeric_literal"); + subst("CU_RES_VIEW_FORMAT_UNSIGNED_BC1", "HIP_RES_VIEW_FORMAT_UNSIGNED_BC1", "numeric_literal"); + subst("CU_RES_VIEW_FORMAT_UNSIGNED_BC2", "HIP_RES_VIEW_FORMAT_UNSIGNED_BC2", "numeric_literal"); + subst("CU_RES_VIEW_FORMAT_UNSIGNED_BC3", "HIP_RES_VIEW_FORMAT_UNSIGNED_BC3", "numeric_literal"); + subst("CU_RES_VIEW_FORMAT_UNSIGNED_BC4", "HIP_RES_VIEW_FORMAT_UNSIGNED_BC4", "numeric_literal"); + subst("CU_RES_VIEW_FORMAT_UNSIGNED_BC5", "HIP_RES_VIEW_FORMAT_UNSIGNED_BC5", "numeric_literal"); + subst("CU_RES_VIEW_FORMAT_UNSIGNED_BC6H", "HIP_RES_VIEW_FORMAT_UNSIGNED_BC6H", "numeric_literal"); + subst("CU_RES_VIEW_FORMAT_UNSIGNED_BC7", "HIP_RES_VIEW_FORMAT_UNSIGNED_BC7", "numeric_literal"); + subst("CU_SHARED_MEM_CONFIG_DEFAULT_BANK_SIZE", "hipSharedMemBankSizeDefault", "numeric_literal"); + subst("CU_SHARED_MEM_CONFIG_EIGHT_BYTE_BANK_SIZE", "hipSharedMemBankSizeEightByte", "numeric_literal"); + subst("CU_SHARED_MEM_CONFIG_FOUR_BYTE_BANK_SIZE", "hipSharedMemBankSizeFourByte", "numeric_literal"); + subst("CU_STREAM_ADD_CAPTURE_DEPENDENCIES", "hipStreamAddCaptureDependencies", "numeric_literal"); + subst("CU_STREAM_CAPTURE_MODE_GLOBAL", "hipStreamCaptureModeGlobal", "numeric_literal"); + subst("CU_STREAM_CAPTURE_MODE_RELAXED", "hipStreamCaptureModeRelaxed", "numeric_literal"); + subst("CU_STREAM_CAPTURE_MODE_THREAD_LOCAL", "hipStreamCaptureModeThreadLocal", "numeric_literal"); + subst("CU_STREAM_CAPTURE_STATUS_ACTIVE", "hipStreamCaptureStatusActive", "numeric_literal"); + subst("CU_STREAM_CAPTURE_STATUS_INVALIDATED", "hipStreamCaptureStatusInvalidated", "numeric_literal"); + subst("CU_STREAM_CAPTURE_STATUS_NONE", "hipStreamCaptureStatusNone", "numeric_literal"); + subst("CU_STREAM_DEFAULT", "hipStreamDefault", "numeric_literal"); + subst("CU_STREAM_NON_BLOCKING", "hipStreamNonBlocking", "numeric_literal"); + subst("CU_STREAM_SET_CAPTURE_DEPENDENCIES", "hipStreamSetCaptureDependencies", "numeric_literal"); + subst("CU_STREAM_WAIT_VALUE_AND", "hipStreamWaitValueAnd", "numeric_literal"); + subst("CU_STREAM_WAIT_VALUE_EQ", "hipStreamWaitValueEq", "numeric_literal"); + subst("CU_STREAM_WAIT_VALUE_GEQ", "hipStreamWaitValueGte", "numeric_literal"); + subst("CU_STREAM_WAIT_VALUE_NOR", "hipStreamWaitValueNor", "numeric_literal"); + subst("CU_TR_ADDRESS_MODE_BORDER", "HIP_TR_ADDRESS_MODE_BORDER", "numeric_literal"); + subst("CU_TR_ADDRESS_MODE_CLAMP", "HIP_TR_ADDRESS_MODE_CLAMP", "numeric_literal"); + subst("CU_TR_ADDRESS_MODE_MIRROR", "HIP_TR_ADDRESS_MODE_MIRROR", "numeric_literal"); + subst("CU_TR_ADDRESS_MODE_WRAP", "HIP_TR_ADDRESS_MODE_WRAP", "numeric_literal"); + subst("CU_TR_FILTER_MODE_LINEAR", "HIP_TR_FILTER_MODE_LINEAR", "numeric_literal"); + subst("CU_TR_FILTER_MODE_POINT", "HIP_TR_FILTER_MODE_POINT", "numeric_literal"); + subst("CU_USER_OBJECT_NO_DESTRUCTOR_SYNC", "hipUserObjectNoDestructorSync", "numeric_literal"); + subst("NVRTC_ERROR_BUILTIN_OPERATION_FAILURE", "HIPRTC_ERROR_BUILTIN_OPERATION_FAILURE", "numeric_literal"); + subst("NVRTC_ERROR_COMPILATION", "HIPRTC_ERROR_COMPILATION", "numeric_literal"); + subst("NVRTC_ERROR_INTERNAL_ERROR", "HIPRTC_ERROR_INTERNAL_ERROR", "numeric_literal"); + subst("NVRTC_ERROR_INVALID_INPUT", "HIPRTC_ERROR_INVALID_INPUT", "numeric_literal"); + subst("NVRTC_ERROR_INVALID_OPTION", "HIPRTC_ERROR_INVALID_OPTION", "numeric_literal"); + subst("NVRTC_ERROR_INVALID_PROGRAM", "HIPRTC_ERROR_INVALID_PROGRAM", "numeric_literal"); + subst("NVRTC_ERROR_NAME_EXPRESSION_NOT_VALID", "HIPRTC_ERROR_NAME_EXPRESSION_NOT_VALID", "numeric_literal"); + subst("NVRTC_ERROR_NO_LOWERED_NAMES_BEFORE_COMPILATION", "HIPRTC_ERROR_NO_LOWERED_NAMES_BEFORE_COMPILATION", "numeric_literal"); + subst("NVRTC_ERROR_NO_NAME_EXPRESSIONS_AFTER_COMPILATION", "HIPRTC_ERROR_NO_NAME_EXPRESSIONS_AFTER_COMPILATION", "numeric_literal"); + subst("NVRTC_ERROR_OUT_OF_MEMORY", "HIPRTC_ERROR_OUT_OF_MEMORY", "numeric_literal"); + subst("NVRTC_ERROR_PROGRAM_CREATION_FAILURE", "HIPRTC_ERROR_PROGRAM_CREATION_FAILURE", "numeric_literal"); + subst("NVRTC_SUCCESS", "HIPRTC_SUCCESS", "numeric_literal"); + subst("cublasLtOrder_t", "hipblasLtOrder_t", "numeric_literal"); + subst("cudaAccessPropertyNormal", "hipAccessPropertyNormal", "numeric_literal"); + subst("cudaAccessPropertyPersisting", "hipAccessPropertyPersisting", "numeric_literal"); + subst("cudaAccessPropertyStreaming", "hipAccessPropertyStreaming", "numeric_literal"); + subst("cudaAddressModeBorder", "hipAddressModeBorder", "numeric_literal"); + subst("cudaAddressModeClamp", "hipAddressModeClamp", "numeric_literal"); + subst("cudaAddressModeMirror", "hipAddressModeMirror", "numeric_literal"); + subst("cudaAddressModeWrap", "hipAddressModeWrap", "numeric_literal"); + subst("cudaBoundaryModeClamp", "hipBoundaryModeClamp", "numeric_literal"); + subst("cudaBoundaryModeTrap", "hipBoundaryModeTrap", "numeric_literal"); + subst("cudaBoundaryModeZero", "hipBoundaryModeZero", "numeric_literal"); + subst("cudaChannelFormatKindFloat", "hipChannelFormatKindFloat", "numeric_literal"); + subst("cudaChannelFormatKindNone", "hipChannelFormatKindNone", "numeric_literal"); + subst("cudaChannelFormatKindSigned", "hipChannelFormatKindSigned", "numeric_literal"); + subst("cudaChannelFormatKindUnsigned", "hipChannelFormatKindUnsigned", "numeric_literal"); + subst("cudaComputeModeDefault", "hipComputeModeDefault", "numeric_literal"); + subst("cudaComputeModeExclusive", "hipComputeModeExclusive", "numeric_literal"); + subst("cudaComputeModeExclusiveProcess", "hipComputeModeExclusiveProcess", "numeric_literal"); + subst("cudaComputeModeProhibited", "hipComputeModeProhibited", "numeric_literal"); + subst("cudaDevAttrAsyncEngineCount", "hipDeviceAttributeAsyncEngineCount", "numeric_literal"); + subst("cudaDevAttrCanMapHostMemory", "hipDeviceAttributeCanMapHostMemory", "numeric_literal"); + subst("cudaDevAttrCanUseHostPointerForRegisteredMem", "hipDeviceAttributeCanUseHostPointerForRegisteredMem", "numeric_literal"); + subst("cudaDevAttrClockRate", "hipDeviceAttributeClockRate", "numeric_literal"); + subst("cudaDevAttrComputeCapabilityMajor", "hipDeviceAttributeComputeCapabilityMajor", "numeric_literal"); + subst("cudaDevAttrComputeCapabilityMinor", "hipDeviceAttributeComputeCapabilityMinor", "numeric_literal"); + subst("cudaDevAttrComputeMode", "hipDeviceAttributeComputeMode", "numeric_literal"); + subst("cudaDevAttrComputePreemptionSupported", "hipDeviceAttributeComputePreemptionSupported", "numeric_literal"); + subst("cudaDevAttrConcurrentKernels", "hipDeviceAttributeConcurrentKernels", "numeric_literal"); + subst("cudaDevAttrConcurrentManagedAccess", "hipDeviceAttributeConcurrentManagedAccess", "numeric_literal"); + subst("cudaDevAttrCooperativeLaunch", "hipDeviceAttributeCooperativeLaunch", "numeric_literal"); + subst("cudaDevAttrCooperativeMultiDeviceLaunch", "hipDeviceAttributeCooperativeMultiDeviceLaunch", "numeric_literal"); + subst("cudaDevAttrDirectManagedMemAccessFromHost", "hipDeviceAttributeDirectManagedMemAccessFromHost", "numeric_literal"); + subst("cudaDevAttrEccEnabled", "hipDeviceAttributeEccEnabled", "numeric_literal"); + subst("cudaDevAttrGlobalL1CacheSupported", "hipDeviceAttributeGlobalL1CacheSupported", "numeric_literal"); + subst("cudaDevAttrGlobalMemoryBusWidth", "hipDeviceAttributeMemoryBusWidth", "numeric_literal"); + subst("cudaDevAttrGpuOverlap", "hipDeviceAttributeAsyncEngineCount", "numeric_literal"); + subst("cudaDevAttrHostNativeAtomicSupported", "hipDeviceAttributeHostNativeAtomicSupported", "numeric_literal"); + subst("cudaDevAttrHostRegisterSupported", "hipDeviceAttributeHostRegisterSupported", "numeric_literal"); + subst("cudaDevAttrIntegrated", "hipDeviceAttributeIntegrated", "numeric_literal"); + subst("cudaDevAttrIsMultiGpuBoard", "hipDeviceAttributeIsMultiGpuBoard", "numeric_literal"); + subst("cudaDevAttrKernelExecTimeout", "hipDeviceAttributeKernelExecTimeout", "numeric_literal"); + subst("cudaDevAttrL2CacheSize", "hipDeviceAttributeL2CacheSize", "numeric_literal"); + subst("cudaDevAttrLocalL1CacheSupported", "hipDeviceAttributeLocalL1CacheSupported", "numeric_literal"); + subst("cudaDevAttrManagedMemory", "hipDeviceAttributeManagedMemory", "numeric_literal"); + subst("cudaDevAttrMaxBlockDimX", "hipDeviceAttributeMaxBlockDimX", "numeric_literal"); + subst("cudaDevAttrMaxBlockDimY", "hipDeviceAttributeMaxBlockDimY", "numeric_literal"); + subst("cudaDevAttrMaxBlockDimZ", "hipDeviceAttributeMaxBlockDimZ", "numeric_literal"); + subst("cudaDevAttrMaxBlocksPerMultiprocessor", "hipDeviceAttributeMaxBlocksPerMultiprocessor", "numeric_literal"); + subst("cudaDevAttrMaxGridDimX", "hipDeviceAttributeMaxGridDimX", "numeric_literal"); + subst("cudaDevAttrMaxGridDimY", "hipDeviceAttributeMaxGridDimY", "numeric_literal"); + subst("cudaDevAttrMaxGridDimZ", "hipDeviceAttributeMaxGridDimZ", "numeric_literal"); + subst("cudaDevAttrMaxPitch", "hipDeviceAttributeMaxPitch", "numeric_literal"); + subst("cudaDevAttrMaxRegistersPerBlock", "hipDeviceAttributeMaxRegistersPerBlock", "numeric_literal"); + subst("cudaDevAttrMaxRegistersPerMultiprocessor", "hipDeviceAttributeMaxRegistersPerMultiprocessor", "numeric_literal"); + subst("cudaDevAttrMaxSharedMemoryPerBlock", "hipDeviceAttributeMaxSharedMemoryPerBlock", "numeric_literal"); + subst("cudaDevAttrMaxSharedMemoryPerBlockOptin", "hipDeviceAttributeSharedMemPerBlockOptin", "numeric_literal"); + subst("cudaDevAttrMaxSharedMemoryPerMultiprocessor", "hipDeviceAttributeMaxSharedMemoryPerMultiprocessor", "numeric_literal"); + subst("cudaDevAttrMaxSurface1DLayeredWidth", "hipDeviceAttributeMaxSurface1DLayered", "numeric_literal"); + subst("cudaDevAttrMaxSurface1DWidth", "hipDeviceAttributeMaxSurface1D", "numeric_literal"); + subst("cudaDevAttrMaxSurface2DHeight", "hipDeviceAttributeMaxSurface2D", "numeric_literal"); + subst("cudaDevAttrMaxSurface2DLayeredHeight", "hipDeviceAttributeMaxSurface2DLayered", "numeric_literal"); + subst("cudaDevAttrMaxSurface2DLayeredWidth", "hipDeviceAttributeMaxSurface2DLayered", "numeric_literal"); + subst("cudaDevAttrMaxSurface2DWidth", "hipDeviceAttributeMaxSurface2D", "numeric_literal"); + subst("cudaDevAttrMaxSurface3DDepth", "hipDeviceAttributeMaxSurface3D", "numeric_literal"); + subst("cudaDevAttrMaxSurface3DHeight", "hipDeviceAttributeMaxSurface3D", "numeric_literal"); + subst("cudaDevAttrMaxSurface3DWidth", "hipDeviceAttributeMaxSurface3D", "numeric_literal"); + subst("cudaDevAttrMaxSurfaceCubemapLayeredWidth", "hipDeviceAttributeMaxSurfaceCubemapLayered", "numeric_literal"); + subst("cudaDevAttrMaxSurfaceCubemapWidth", "hipDeviceAttributeMaxSurfaceCubemap", "numeric_literal"); + subst("cudaDevAttrMaxTexture1DLayeredWidth", "hipDeviceAttributeMaxTexture1DLayered", "numeric_literal"); + subst("cudaDevAttrMaxTexture1DLinearWidth", "hipDeviceAttributeMaxTexture1DLinear", "numeric_literal"); + subst("cudaDevAttrMaxTexture1DMipmappedWidth", "hipDeviceAttributeMaxTexture1DMipmap", "numeric_literal"); + subst("cudaDevAttrMaxTexture1DWidth", "hipDeviceAttributeMaxTexture1DWidth", "numeric_literal"); + subst("cudaDevAttrMaxTexture2DGatherHeight", "hipDeviceAttributeMaxTexture2DGather", "numeric_literal"); + subst("cudaDevAttrMaxTexture2DGatherWidth", "hipDeviceAttributeMaxTexture2DGather", "numeric_literal"); + subst("cudaDevAttrMaxTexture2DHeight", "hipDeviceAttributeMaxTexture2DHeight", "numeric_literal"); + subst("cudaDevAttrMaxTexture2DLayeredHeight", "hipDeviceAttributeMaxTexture2DLayered", "numeric_literal"); + subst("cudaDevAttrMaxTexture2DLayeredWidth", "hipDeviceAttributeMaxTexture2DLayered", "numeric_literal"); + subst("cudaDevAttrMaxTexture2DLinearHeight", "hipDeviceAttributeMaxTexture2DLinear", "numeric_literal"); + subst("cudaDevAttrMaxTexture2DLinearPitch", "hipDeviceAttributeMaxTexture2DLinear", "numeric_literal"); + subst("cudaDevAttrMaxTexture2DLinearWidth", "hipDeviceAttributeMaxTexture2DLinear", "numeric_literal"); + subst("cudaDevAttrMaxTexture2DMipmappedHeight", "hipDeviceAttributeMaxTexture2DMipmap", "numeric_literal"); + subst("cudaDevAttrMaxTexture2DMipmappedWidth", "hipDeviceAttributeMaxTexture2DMipmap", "numeric_literal"); + subst("cudaDevAttrMaxTexture2DWidth", "hipDeviceAttributeMaxTexture2DWidth", "numeric_literal"); + subst("cudaDevAttrMaxTexture3DDepth", "hipDeviceAttributeMaxTexture3DDepth", "numeric_literal"); + subst("cudaDevAttrMaxTexture3DDepthAlt", "hipDeviceAttributeMaxTexture3DAlt", "numeric_literal"); + subst("cudaDevAttrMaxTexture3DHeight", "hipDeviceAttributeMaxTexture3DHeight", "numeric_literal"); + subst("cudaDevAttrMaxTexture3DHeightAlt", "hipDeviceAttributeMaxTexture3DAlt", "numeric_literal"); + subst("cudaDevAttrMaxTexture3DWidth", "hipDeviceAttributeMaxTexture3DWidth", "numeric_literal"); + subst("cudaDevAttrMaxTexture3DWidthAlt", "hipDeviceAttributeMaxTexture3DAlt", "numeric_literal"); + subst("cudaDevAttrMaxTextureCubemapLayeredWidth", "hipDeviceAttributeMaxTextureCubemapLayered", "numeric_literal"); + subst("cudaDevAttrMaxTextureCubemapWidth", "hipDeviceAttributeMaxTextureCubemap", "numeric_literal"); + subst("cudaDevAttrMaxThreadsPerBlock", "hipDeviceAttributeMaxThreadsPerBlock", "numeric_literal"); + subst("cudaDevAttrMaxThreadsPerMultiProcessor", "hipDeviceAttributeMaxThreadsPerMultiProcessor", "numeric_literal"); + subst("cudaDevAttrMemoryClockRate", "hipDeviceAttributeMemoryClockRate", "numeric_literal"); + subst("cudaDevAttrMemoryPoolsSupported", "hipDeviceAttributeMemoryPoolsSupported", "numeric_literal"); + subst("cudaDevAttrMultiGpuBoardGroupID", "hipDeviceAttributeMultiGpuBoardGroupID", "numeric_literal"); + subst("cudaDevAttrMultiProcessorCount", "hipDeviceAttributeMultiprocessorCount", "numeric_literal"); + subst("cudaDevAttrPageableMemoryAccess", "hipDeviceAttributePageableMemoryAccess", "numeric_literal"); + subst("cudaDevAttrPageableMemoryAccessUsesHostPageTables", "hipDeviceAttributePageableMemoryAccessUsesHostPageTables", "numeric_literal"); + subst("cudaDevAttrPciBusId", "hipDeviceAttributePciBusId", "numeric_literal"); + subst("cudaDevAttrPciDeviceId", "hipDeviceAttributePciDeviceId", "numeric_literal"); + subst("cudaDevAttrPciDomainId", "hipDeviceAttributePciDomainID", "numeric_literal"); + subst("cudaDevAttrReserved94", "hipDeviceAttributeCanUseStreamWaitValue", "numeric_literal"); + subst("cudaDevAttrSingleToDoublePrecisionPerfRatio", "hipDeviceAttributeSingleToDoublePrecisionPerfRatio", "numeric_literal"); + subst("cudaDevAttrStreamPrioritiesSupported", "hipDeviceAttributeStreamPrioritiesSupported", "numeric_literal"); + subst("cudaDevAttrSurfaceAlignment", "hipDeviceAttributeSurfaceAlignment", "numeric_literal"); + subst("cudaDevAttrTccDriver", "hipDeviceAttributeTccDriver", "numeric_literal"); + subst("cudaDevAttrTextureAlignment", "hipDeviceAttributeTextureAlignment", "numeric_literal"); + subst("cudaDevAttrTexturePitchAlignment", "hipDeviceAttributeTexturePitchAlignment", "numeric_literal"); + subst("cudaDevAttrTotalConstantMemory", "hipDeviceAttributeTotalConstantMemory", "numeric_literal"); + subst("cudaDevAttrUnifiedAddressing", "hipDeviceAttributeUnifiedAddressing", "numeric_literal"); + subst("cudaDevAttrWarpSize", "hipDeviceAttributeWarpSize", "numeric_literal"); + subst("cudaDevP2PAttrAccessSupported", "hipDevP2PAttrAccessSupported", "numeric_literal"); + subst("cudaDevP2PAttrCudaArrayAccessSupported", "hipDevP2PAttrHipArrayAccessSupported", "numeric_literal"); + subst("cudaDevP2PAttrNativeAtomicSupported", "hipDevP2PAttrNativeAtomicSupported", "numeric_literal"); + subst("cudaDevP2PAttrPerformanceRank", "hipDevP2PAttrPerformanceRank", "numeric_literal"); + subst("cudaErrorAlreadyAcquired", "hipErrorAlreadyAcquired", "numeric_literal"); + subst("cudaErrorAlreadyMapped", "hipErrorAlreadyMapped", "numeric_literal"); + subst("cudaErrorArrayIsMapped", "hipErrorArrayIsMapped", "numeric_literal"); + subst("cudaErrorAssert", "hipErrorAssert", "numeric_literal"); + subst("cudaErrorCapturedEvent", "hipErrorCapturedEvent", "numeric_literal"); + subst("cudaErrorContextIsDestroyed", "hipErrorContextIsDestroyed", "numeric_literal"); + subst("cudaErrorCooperativeLaunchTooLarge", "hipErrorCooperativeLaunchTooLarge", "numeric_literal"); + subst("cudaErrorCudartUnloading", "hipErrorDeinitialized", "numeric_literal"); + subst("cudaErrorDeviceAlreadyInUse", "hipErrorContextAlreadyInUse", "numeric_literal"); + subst("cudaErrorDeviceUninitialized", "hipErrorInvalidContext", "numeric_literal"); + subst("cudaErrorECCUncorrectable", "hipErrorECCNotCorrectable", "numeric_literal"); + subst("cudaErrorFileNotFound", "hipErrorFileNotFound", "numeric_literal"); + subst("cudaErrorGraphExecUpdateFailure", "hipErrorGraphExecUpdateFailure", "numeric_literal"); + subst("cudaErrorHostMemoryAlreadyRegistered", "hipErrorHostMemoryAlreadyRegistered", "numeric_literal"); + subst("cudaErrorHostMemoryNotRegistered", "hipErrorHostMemoryNotRegistered", "numeric_literal"); + subst("cudaErrorIllegalAddress", "hipErrorIllegalAddress", "numeric_literal"); + subst("cudaErrorIllegalState", "hipErrorIllegalState", "numeric_literal"); + subst("cudaErrorInitializationError", "hipErrorNotInitialized", "numeric_literal"); + subst("cudaErrorInsufficientDriver", "hipErrorInsufficientDriver", "numeric_literal"); + subst("cudaErrorInvalidConfiguration", "hipErrorInvalidConfiguration", "numeric_literal"); + subst("cudaErrorInvalidDevice", "hipErrorInvalidDevice", "numeric_literal"); + subst("cudaErrorInvalidDeviceFunction", "hipErrorInvalidDeviceFunction", "numeric_literal"); + subst("cudaErrorInvalidDevicePointer", "hipErrorInvalidDevicePointer", "numeric_literal"); + subst("cudaErrorInvalidGraphicsContext", "hipErrorInvalidGraphicsContext", "numeric_literal"); + subst("cudaErrorInvalidKernelImage", "hipErrorInvalidImage", "numeric_literal"); + subst("cudaErrorInvalidMemcpyDirection", "hipErrorInvalidMemcpyDirection", "numeric_literal"); + subst("cudaErrorInvalidPitchValue", "hipErrorInvalidPitchValue", "numeric_literal"); + subst("cudaErrorInvalidPtx", "hipErrorInvalidKernelFile", "numeric_literal"); + subst("cudaErrorInvalidResourceHandle", "hipErrorInvalidHandle", "numeric_literal"); + subst("cudaErrorInvalidSource", "hipErrorInvalidSource", "numeric_literal"); + subst("cudaErrorInvalidSymbol", "hipErrorInvalidSymbol", "numeric_literal"); + subst("cudaErrorInvalidValue", "hipErrorInvalidValue", "numeric_literal"); + subst("cudaErrorLaunchFailure", "hipErrorLaunchFailure", "numeric_literal"); + subst("cudaErrorLaunchOutOfResources", "hipErrorLaunchOutOfResources", "numeric_literal"); + subst("cudaErrorLaunchTimeout", "hipErrorLaunchTimeOut", "numeric_literal"); + subst("cudaErrorLossyQuery", "hipErrorLossyQuery", "numeric_literal"); + subst("cudaErrorMapBufferObjectFailed", "hipErrorMapFailed", "numeric_literal"); + subst("cudaErrorMemoryAllocation", "hipErrorOutOfMemory", "numeric_literal"); + subst("cudaErrorMissingConfiguration", "hipErrorMissingConfiguration", "numeric_literal"); + subst("cudaErrorNoDevice", "hipErrorNoDevice", "numeric_literal"); + subst("cudaErrorNoKernelImageForDevice", "hipErrorNoBinaryForGpu", "numeric_literal"); + subst("cudaErrorNotMapped", "hipErrorNotMapped", "numeric_literal"); + subst("cudaErrorNotMappedAsArray", "hipErrorNotMappedAsArray", "numeric_literal"); + subst("cudaErrorNotMappedAsPointer", "hipErrorNotMappedAsPointer", "numeric_literal"); + subst("cudaErrorNotReady", "hipErrorNotReady", "numeric_literal"); + subst("cudaErrorNotSupported", "hipErrorNotSupported", "numeric_literal"); + subst("cudaErrorOperatingSystem", "hipErrorOperatingSystem", "numeric_literal"); + subst("cudaErrorPeerAccessAlreadyEnabled", "hipErrorPeerAccessAlreadyEnabled", "numeric_literal"); + subst("cudaErrorPeerAccessNotEnabled", "hipErrorPeerAccessNotEnabled", "numeric_literal"); + subst("cudaErrorPeerAccessUnsupported", "hipErrorPeerAccessUnsupported", "numeric_literal"); + subst("cudaErrorPriorLaunchFailure", "hipErrorPriorLaunchFailure", "numeric_literal"); + subst("cudaErrorProfilerAlreadyStarted", "hipErrorProfilerAlreadyStarted", "numeric_literal"); + subst("cudaErrorProfilerAlreadyStopped", "hipErrorProfilerAlreadyStopped", "numeric_literal"); + subst("cudaErrorProfilerDisabled", "hipErrorProfilerDisabled", "numeric_literal"); + subst("cudaErrorProfilerNotInitialized", "hipErrorProfilerNotInitialized", "numeric_literal"); + subst("cudaErrorSetOnActiveProcess", "hipErrorSetOnActiveProcess", "numeric_literal"); + subst("cudaErrorSharedObjectInitFailed", "hipErrorSharedObjectInitFailed", "numeric_literal"); + subst("cudaErrorSharedObjectSymbolNotFound", "hipErrorSharedObjectSymbolNotFound", "numeric_literal"); + subst("cudaErrorStreamCaptureImplicit", "hipErrorStreamCaptureImplicit", "numeric_literal"); + subst("cudaErrorStreamCaptureInvalidated", "hipErrorStreamCaptureInvalidated", "numeric_literal"); + subst("cudaErrorStreamCaptureIsolation", "hipErrorStreamCaptureIsolation", "numeric_literal"); + subst("cudaErrorStreamCaptureMerge", "hipErrorStreamCaptureMerge", "numeric_literal"); + subst("cudaErrorStreamCaptureUnjoined", "hipErrorStreamCaptureUnjoined", "numeric_literal"); + subst("cudaErrorStreamCaptureUnmatched", "hipErrorStreamCaptureUnmatched", "numeric_literal"); + subst("cudaErrorStreamCaptureUnsupported", "hipErrorStreamCaptureUnsupported", "numeric_literal"); + subst("cudaErrorStreamCaptureWrongThread", "hipErrorStreamCaptureWrongThread", "numeric_literal"); + subst("cudaErrorSymbolNotFound", "hipErrorNotFound", "numeric_literal"); + subst("cudaErrorUnknown", "hipErrorUnknown", "numeric_literal"); + subst("cudaErrorUnmapBufferObjectFailed", "hipErrorUnmapFailed", "numeric_literal"); + subst("cudaErrorUnsupportedLimit", "hipErrorUnsupportedLimit", "numeric_literal"); + subst("cudaExternalMemoryHandleTypeD3D11Resource", "hipExternalMemoryHandleTypeD3D11Resource", "numeric_literal"); + subst("cudaExternalMemoryHandleTypeD3D11ResourceKmt", "hipExternalMemoryHandleTypeD3D11ResourceKmt", "numeric_literal"); + subst("cudaExternalMemoryHandleTypeD3D12Heap", "hipExternalMemoryHandleTypeD3D12Heap", "numeric_literal"); + subst("cudaExternalMemoryHandleTypeD3D12Resource", "hipExternalMemoryHandleTypeD3D12Resource", "numeric_literal"); + subst("cudaExternalMemoryHandleTypeOpaqueFd", "hipExternalMemoryHandleTypeOpaqueFd", "numeric_literal"); + subst("cudaExternalMemoryHandleTypeOpaqueWin32", "hipExternalMemoryHandleTypeOpaqueWin32", "numeric_literal"); + subst("cudaExternalMemoryHandleTypeOpaqueWin32Kmt", "hipExternalMemoryHandleTypeOpaqueWin32Kmt", "numeric_literal"); + subst("cudaExternalSemaphoreHandleTypeD3D12Fence", "hipExternalSemaphoreHandleTypeD3D12Fence", "numeric_literal"); + subst("cudaExternalSemaphoreHandleTypeOpaqueFd", "hipExternalSemaphoreHandleTypeOpaqueFd", "numeric_literal"); + subst("cudaExternalSemaphoreHandleTypeOpaqueWin32", "hipExternalSemaphoreHandleTypeOpaqueWin32", "numeric_literal"); + subst("cudaExternalSemaphoreHandleTypeOpaqueWin32Kmt", "hipExternalSemaphoreHandleTypeOpaqueWin32Kmt", "numeric_literal"); + subst("cudaFilterModeLinear", "hipFilterModeLinear", "numeric_literal"); + subst("cudaFilterModePoint", "hipFilterModePoint", "numeric_literal"); + subst("cudaFlushGPUDirectRDMAWritesOptionHost", "hipFlushGPUDirectRDMAWritesOptionHost", "numeric_literal"); + subst("cudaFlushGPUDirectRDMAWritesOptionMemOps", "hipFlushGPUDirectRDMAWritesOptionMemOps", "numeric_literal"); + subst("cudaFuncAttributeMax", "hipFuncAttributeMax", "numeric_literal"); + subst("cudaFuncAttributeMaxDynamicSharedMemorySize", "hipFuncAttributeMaxDynamicSharedMemorySize", "numeric_literal"); + subst("cudaFuncAttributePreferredSharedMemoryCarveout", "hipFuncAttributePreferredSharedMemoryCarveout", "numeric_literal"); + subst("cudaFuncCachePreferEqual", "hipFuncCachePreferEqual", "numeric_literal"); + subst("cudaFuncCachePreferL1", "hipFuncCachePreferL1", "numeric_literal"); + subst("cudaFuncCachePreferNone", "hipFuncCachePreferNone", "numeric_literal"); + subst("cudaFuncCachePreferShared", "hipFuncCachePreferShared", "numeric_literal"); + subst("cudaGLDeviceListAll", "hipGLDeviceListAll", "numeric_literal"); + subst("cudaGLDeviceListCurrentFrame", "hipGLDeviceListCurrentFrame", "numeric_literal"); + subst("cudaGLDeviceListNextFrame", "hipGLDeviceListNextFrame", "numeric_literal"); + subst("cudaGPUDirectRDMAWritesOrderingAllDevices", "hipGPUDirectRDMAWritesOrderingAllDevices", "numeric_literal"); + subst("cudaGPUDirectRDMAWritesOrderingNone", "hipGPUDirectRDMAWritesOrderingNone", "numeric_literal"); + subst("cudaGPUDirectRDMAWritesOrderingOwner", "hipGPUDirectRDMAWritesOrderingOwner", "numeric_literal"); + subst("cudaGraphDebugDotFlagsEventNodeParams", "hipGraphDebugDotFlagsEventNodeParams", "numeric_literal"); + subst("cudaGraphDebugDotFlagsExtSemasSignalNodeParams", "hipGraphDebugDotFlagsExtSemasSignalNodeParams", "numeric_literal"); + subst("cudaGraphDebugDotFlagsExtSemasWaitNodeParams", "hipGraphDebugDotFlagsExtSemasWaitNodeParams", "numeric_literal"); + subst("cudaGraphDebugDotFlagsHandles", "hipGraphDebugDotFlagsHandles", "numeric_literal"); + subst("cudaGraphDebugDotFlagsHostNodeParams", "hipGraphDebugDotFlagsHostNodeParams", "numeric_literal"); + subst("cudaGraphDebugDotFlagsKernelNodeAttributes", "hipGraphDebugDotFlagsKernelNodeAttributes", "numeric_literal"); + subst("cudaGraphDebugDotFlagsKernelNodeParams", "hipGraphDebugDotFlagsKernelNodeParams", "numeric_literal"); + subst("cudaGraphDebugDotFlagsMemcpyNodeParams", "hipGraphDebugDotFlagsMemcpyNodeParams", "numeric_literal"); + subst("cudaGraphDebugDotFlagsMemsetNodeParams", "hipGraphDebugDotFlagsMemsetNodeParams", "numeric_literal"); + subst("cudaGraphDebugDotFlagsVerbose", "hipGraphDebugDotFlagsVerbose", "numeric_literal"); + subst("cudaGraphExecUpdateError", "hipGraphExecUpdateError", "numeric_literal"); + subst("cudaGraphExecUpdateErrorFunctionChanged", "hipGraphExecUpdateErrorFunctionChanged", "numeric_literal"); + subst("cudaGraphExecUpdateErrorNodeTypeChanged", "hipGraphExecUpdateErrorNodeTypeChanged", "numeric_literal"); + subst("cudaGraphExecUpdateErrorNotSupported", "hipGraphExecUpdateErrorNotSupported", "numeric_literal"); + subst("cudaGraphExecUpdateErrorParametersChanged", "hipGraphExecUpdateErrorParametersChanged", "numeric_literal"); + subst("cudaGraphExecUpdateErrorTopologyChanged", "hipGraphExecUpdateErrorTopologyChanged", "numeric_literal"); + subst("cudaGraphExecUpdateErrorUnsupportedFunctionChange", "hipGraphExecUpdateErrorUnsupportedFunctionChange", "numeric_literal"); + subst("cudaGraphExecUpdateSuccess", "hipGraphExecUpdateSuccess", "numeric_literal"); + subst("cudaGraphInstantiateError", "hipGraphInstantiateError", "numeric_literal"); + subst("cudaGraphInstantiateFlagAutoFreeOnLaunch", "hipGraphInstantiateFlagAutoFreeOnLaunch", "numeric_literal"); + subst("cudaGraphInstantiateFlagDeviceLaunch", "hipGraphInstantiateFlagDeviceLaunch", "numeric_literal"); + subst("cudaGraphInstantiateFlagUpload", "hipGraphInstantiateFlagUpload", "numeric_literal"); + subst("cudaGraphInstantiateFlagUseNodePriority", "hipGraphInstantiateFlagUseNodePriority", "numeric_literal"); + subst("cudaGraphInstantiateInvalidStructure", "hipGraphInstantiateInvalidStructure", "numeric_literal"); + subst("cudaGraphInstantiateMultipleDevicesNotSupported", "hipGraphInstantiateMultipleDevicesNotSupported", "numeric_literal"); + subst("cudaGraphInstantiateNodeOperationNotSupported", "hipGraphInstantiateNodeOperationNotSupported", "numeric_literal"); + subst("cudaGraphInstantiateSuccess", "hipGraphInstantiateSuccess", "numeric_literal"); + subst("cudaGraphMemAttrReservedMemCurrent", "hipGraphMemAttrReservedMemCurrent", "numeric_literal"); + subst("cudaGraphMemAttrReservedMemHigh", "hipGraphMemAttrReservedMemHigh", "numeric_literal"); + subst("cudaGraphMemAttrUsedMemCurrent", "hipGraphMemAttrUsedMemCurrent", "numeric_literal"); + subst("cudaGraphMemAttrUsedMemHigh", "hipGraphMemAttrUsedMemHigh", "numeric_literal"); + subst("cudaGraphNodeTypeConditional", "hipGraphNodeTypeConditional", "numeric_literal"); + subst("cudaGraphNodeTypeCount", "hipGraphNodeTypeCount", "numeric_literal"); + subst("cudaGraphNodeTypeEmpty", "hipGraphNodeTypeEmpty", "numeric_literal"); + subst("cudaGraphNodeTypeEventRecord", "hipGraphNodeTypeEventRecord", "numeric_literal"); + subst("cudaGraphNodeTypeExtSemaphoreSignal", "hipGraphNodeTypeExtSemaphoreSignal", "numeric_literal"); + subst("cudaGraphNodeTypeExtSemaphoreWait", "hipGraphNodeTypeExtSemaphoreWait", "numeric_literal"); + subst("cudaGraphNodeTypeGraph", "hipGraphNodeTypeGraph", "numeric_literal"); + subst("cudaGraphNodeTypeHost", "hipGraphNodeTypeHost", "numeric_literal"); + subst("cudaGraphNodeTypeKernel", "hipGraphNodeTypeKernel", "numeric_literal"); + subst("cudaGraphNodeTypeMemAlloc", "hipGraphNodeTypeMemAlloc", "numeric_literal"); + subst("cudaGraphNodeTypeMemFree", "hipGraphNodeTypeMemFree", "numeric_literal"); + subst("cudaGraphNodeTypeMemcpy", "hipGraphNodeTypeMemcpy", "numeric_literal"); + subst("cudaGraphNodeTypeMemset", "hipGraphNodeTypeMemset", "numeric_literal"); + subst("cudaGraphNodeTypeWaitEvent", "hipGraphNodeTypeWaitEvent", "numeric_literal"); + subst("cudaGraphUserObjectMove", "hipGraphUserObjectMove", "numeric_literal"); + subst("cudaGraphicsRegisterFlagsNone", "hipGraphicsRegisterFlagsNone", "numeric_literal"); + subst("cudaGraphicsRegisterFlagsReadOnly", "hipGraphicsRegisterFlagsReadOnly", "numeric_literal"); + subst("cudaGraphicsRegisterFlagsSurfaceLoadStore", "hipGraphicsRegisterFlagsSurfaceLoadStore", "numeric_literal"); + subst("cudaGraphicsRegisterFlagsTextureGather", "hipGraphicsRegisterFlagsTextureGather", "numeric_literal"); + subst("cudaGraphicsRegisterFlagsWriteDiscard", "hipGraphicsRegisterFlagsWriteDiscard", "numeric_literal"); + subst("cudaKernelNodeAttributeAccessPolicyWindow", "hipKernelNodeAttributeAccessPolicyWindow", "numeric_literal"); + subst("cudaKernelNodeAttributeCooperative", "hipKernelNodeAttributeCooperative", "numeric_literal"); + subst("cudaLimitMallocHeapSize", "hipLimitMallocHeapSize", "numeric_literal"); + subst("cudaLimitPrintfFifoSize", "hipLimitPrintfFifoSize", "numeric_literal"); + subst("cudaLimitStackSize", "hipLimitStackSize", "numeric_literal"); + subst("cudaMemAccessFlagsProtNone", "hipMemAccessFlagsProtNone", "numeric_literal"); + subst("cudaMemAccessFlagsProtRead", "hipMemAccessFlagsProtRead", "numeric_literal"); + subst("cudaMemAccessFlagsProtReadWrite", "hipMemAccessFlagsProtReadWrite", "numeric_literal"); + subst("cudaMemAdviseSetAccessedBy", "hipMemAdviseSetAccessedBy", "numeric_literal"); + subst("cudaMemAdviseSetPreferredLocation", "hipMemAdviseSetPreferredLocation", "numeric_literal"); + subst("cudaMemAdviseSetReadMostly", "hipMemAdviseSetReadMostly", "numeric_literal"); + subst("cudaMemAdviseUnsetAccessedBy", "hipMemAdviseUnsetAccessedBy", "numeric_literal"); + subst("cudaMemAdviseUnsetPreferredLocation", "hipMemAdviseUnsetPreferredLocation", "numeric_literal"); + subst("cudaMemAdviseUnsetReadMostly", "hipMemAdviseUnsetReadMostly", "numeric_literal"); + subst("cudaMemAllocationTypeInvalid", "hipMemAllocationTypeInvalid", "numeric_literal"); + subst("cudaMemAllocationTypeMax", "hipMemAllocationTypeMax", "numeric_literal"); + subst("cudaMemAllocationTypePinned", "hipMemAllocationTypePinned", "numeric_literal"); + subst("cudaMemHandleTypeNone", "hipMemHandleTypeNone", "numeric_literal"); + subst("cudaMemHandleTypePosixFileDescriptor", "hipMemHandleTypePosixFileDescriptor", "numeric_literal"); + subst("cudaMemHandleTypeWin32", "hipMemHandleTypeWin32", "numeric_literal"); + subst("cudaMemHandleTypeWin32Kmt", "hipMemHandleTypeWin32Kmt", "numeric_literal"); + subst("cudaMemLocationTypeDevice", "hipMemLocationTypeDevice", "numeric_literal"); + subst("cudaMemLocationTypeInvalid", "hipMemLocationTypeInvalid", "numeric_literal"); + subst("cudaMemPoolAttrReleaseThreshold", "hipMemPoolAttrReleaseThreshold", "numeric_literal"); + subst("cudaMemPoolAttrReservedMemCurrent", "hipMemPoolAttrReservedMemCurrent", "numeric_literal"); + subst("cudaMemPoolAttrReservedMemHigh", "hipMemPoolAttrReservedMemHigh", "numeric_literal"); + subst("cudaMemPoolAttrUsedMemCurrent", "hipMemPoolAttrUsedMemCurrent", "numeric_literal"); + subst("cudaMemPoolAttrUsedMemHigh", "hipMemPoolAttrUsedMemHigh", "numeric_literal"); + subst("cudaMemPoolReuseAllowInternalDependencies", "hipMemPoolReuseAllowInternalDependencies", "numeric_literal"); + subst("cudaMemPoolReuseAllowOpportunistic", "hipMemPoolReuseAllowOpportunistic", "numeric_literal"); + subst("cudaMemPoolReuseFollowEventDependencies", "hipMemPoolReuseFollowEventDependencies", "numeric_literal"); + subst("cudaMemRangeAttributeAccessedBy", "hipMemRangeAttributeAccessedBy", "numeric_literal"); + subst("cudaMemRangeAttributeLastPrefetchLocation", "hipMemRangeAttributeLastPrefetchLocation", "numeric_literal"); + subst("cudaMemRangeAttributePreferredLocation", "hipMemRangeAttributePreferredLocation", "numeric_literal"); + subst("cudaMemRangeAttributeReadMostly", "hipMemRangeAttributeReadMostly", "numeric_literal"); + subst("cudaMemcpyDefault", "hipMemcpyDefault", "numeric_literal"); + subst("cudaMemcpyDeviceToDevice", "hipMemcpyDeviceToDevice", "numeric_literal"); + subst("cudaMemcpyDeviceToHost", "hipMemcpyDeviceToHost", "numeric_literal"); + subst("cudaMemcpyHostToDevice", "hipMemcpyHostToDevice", "numeric_literal"); + subst("cudaMemcpyHostToHost", "hipMemcpyHostToHost", "numeric_literal"); + subst("cudaMemoryTypeDevice", "hipMemoryTypeDevice", "numeric_literal"); + subst("cudaMemoryTypeHost", "hipMemoryTypeHost", "numeric_literal"); + subst("cudaMemoryTypeManaged", "hipMemoryTypeManaged", "numeric_literal"); + subst("cudaReadModeElementType", "hipReadModeElementType", "numeric_literal"); + subst("cudaReadModeNormalizedFloat", "hipReadModeNormalizedFloat", "numeric_literal"); + subst("cudaResViewFormatFloat1", "hipResViewFormatFloat1", "numeric_literal"); + subst("cudaResViewFormatFloat2", "hipResViewFormatFloat2", "numeric_literal"); + subst("cudaResViewFormatFloat4", "hipResViewFormatFloat4", "numeric_literal"); + subst("cudaResViewFormatHalf1", "hipResViewFormatHalf1", "numeric_literal"); + subst("cudaResViewFormatHalf2", "hipResViewFormatHalf2", "numeric_literal"); + subst("cudaResViewFormatHalf4", "hipResViewFormatHalf4", "numeric_literal"); + subst("cudaResViewFormatNone", "hipResViewFormatNone", "numeric_literal"); + subst("cudaResViewFormatSignedBlockCompressed4", "hipResViewFormatSignedBlockCompressed4", "numeric_literal"); + subst("cudaResViewFormatSignedBlockCompressed5", "hipResViewFormatSignedBlockCompressed5", "numeric_literal"); + subst("cudaResViewFormatSignedBlockCompressed6H", "hipResViewFormatSignedBlockCompressed6H", "numeric_literal"); + subst("cudaResViewFormatSignedChar1", "hipResViewFormatSignedChar1", "numeric_literal"); + subst("cudaResViewFormatSignedChar2", "hipResViewFormatSignedChar2", "numeric_literal"); + subst("cudaResViewFormatSignedChar4", "hipResViewFormatSignedChar4", "numeric_literal"); + subst("cudaResViewFormatSignedInt1", "hipResViewFormatSignedInt1", "numeric_literal"); + subst("cudaResViewFormatSignedInt2", "hipResViewFormatSignedInt2", "numeric_literal"); + subst("cudaResViewFormatSignedInt4", "hipResViewFormatSignedInt4", "numeric_literal"); + subst("cudaResViewFormatSignedShort1", "hipResViewFormatSignedShort1", "numeric_literal"); + subst("cudaResViewFormatSignedShort2", "hipResViewFormatSignedShort2", "numeric_literal"); + subst("cudaResViewFormatSignedShort4", "hipResViewFormatSignedShort4", "numeric_literal"); + subst("cudaResViewFormatUnsignedBlockCompressed1", "hipResViewFormatUnsignedBlockCompressed1", "numeric_literal"); + subst("cudaResViewFormatUnsignedBlockCompressed2", "hipResViewFormatUnsignedBlockCompressed2", "numeric_literal"); + subst("cudaResViewFormatUnsignedBlockCompressed3", "hipResViewFormatUnsignedBlockCompressed3", "numeric_literal"); + subst("cudaResViewFormatUnsignedBlockCompressed4", "hipResViewFormatUnsignedBlockCompressed4", "numeric_literal"); + subst("cudaResViewFormatUnsignedBlockCompressed5", "hipResViewFormatUnsignedBlockCompressed5", "numeric_literal"); + subst("cudaResViewFormatUnsignedBlockCompressed6H", "hipResViewFormatUnsignedBlockCompressed6H", "numeric_literal"); + subst("cudaResViewFormatUnsignedBlockCompressed7", "hipResViewFormatUnsignedBlockCompressed7", "numeric_literal"); + subst("cudaResViewFormatUnsignedChar1", "hipResViewFormatUnsignedChar1", "numeric_literal"); + subst("cudaResViewFormatUnsignedChar2", "hipResViewFormatUnsignedChar2", "numeric_literal"); + subst("cudaResViewFormatUnsignedChar4", "hipResViewFormatUnsignedChar4", "numeric_literal"); + subst("cudaResViewFormatUnsignedInt1", "hipResViewFormatUnsignedInt1", "numeric_literal"); + subst("cudaResViewFormatUnsignedInt2", "hipResViewFormatUnsignedInt2", "numeric_literal"); + subst("cudaResViewFormatUnsignedInt4", "hipResViewFormatUnsignedInt4", "numeric_literal"); + subst("cudaResViewFormatUnsignedShort1", "hipResViewFormatUnsignedShort1", "numeric_literal"); + subst("cudaResViewFormatUnsignedShort2", "hipResViewFormatUnsignedShort2", "numeric_literal"); + subst("cudaResViewFormatUnsignedShort4", "hipResViewFormatUnsignedShort4", "numeric_literal"); + subst("cudaResourceTypeArray", "hipResourceTypeArray", "numeric_literal"); + subst("cudaResourceTypeLinear", "hipResourceTypeLinear", "numeric_literal"); + subst("cudaResourceTypeMipmappedArray", "hipResourceTypeMipmappedArray", "numeric_literal"); + subst("cudaResourceTypePitch2D", "hipResourceTypePitch2D", "numeric_literal"); + subst("cudaSharedMemBankSizeDefault", "hipSharedMemBankSizeDefault", "numeric_literal"); + subst("cudaSharedMemBankSizeEightByte", "hipSharedMemBankSizeEightByte", "numeric_literal"); + subst("cudaSharedMemBankSizeFourByte", "hipSharedMemBankSizeFourByte", "numeric_literal"); + subst("cudaStreamAddCaptureDependencies", "hipStreamAddCaptureDependencies", "numeric_literal"); + subst("cudaStreamCaptureModeGlobal", "hipStreamCaptureModeGlobal", "numeric_literal"); + subst("cudaStreamCaptureModeRelaxed", "hipStreamCaptureModeRelaxed", "numeric_literal"); + subst("cudaStreamCaptureModeThreadLocal", "hipStreamCaptureModeThreadLocal", "numeric_literal"); + subst("cudaStreamCaptureStatusActive", "hipStreamCaptureStatusActive", "numeric_literal"); + subst("cudaStreamCaptureStatusInvalidated", "hipStreamCaptureStatusInvalidated", "numeric_literal"); + subst("cudaStreamCaptureStatusNone", "hipStreamCaptureStatusNone", "numeric_literal"); + subst("cudaStreamSetCaptureDependencies", "hipStreamSetCaptureDependencies", "numeric_literal"); + subst("cudaSuccess", "hipSuccess", "numeric_literal"); + subst("cudaUserObjectNoDestructorSync", "hipUserObjectNoDestructorSync", "numeric_literal"); + subst("cusolver_int_t", "int", "numeric_literal"); + subst("CUB_MAX", "CUB_MAX", "define"); + subst("CUB_MIN", "CUB_MIN", "define"); + subst("CUB_NAMESPACE_BEGIN", "BEGIN_HIPCUB_NAMESPACE", "define"); + subst("CUB_NAMESPACE_END", "END_HIPCUB_NAMESPACE", "define"); + subst("CUB_PTX_ARCH", "HIPCUB_ARCH", "define"); + subst("CUB_PTX_WARP_THREADS", "HIPCUB_WARP_THREADS", "define"); + subst("CUB_RUNTIME_FUNCTION", "HIPCUB_RUNTIME_FUNCTION", "define"); + subst("CUB_STDERR", "HIPCUB_STDERR", "define"); + subst("CUDART_2_OVER_PI", "HIP_2_OVER_PI", "define"); + subst("CUDART_2_OVER_PI_F", "HIP_2_OVER_PI_F", "define"); + subst("CUDART_3PIO4", "HIP_3PIO4", "define"); + subst("CUDART_3PIO4_F", "HIP_3PIO4_F", "define"); + subst("CUDART_DBL2INT_CVT", "HIP_DBL2INT_CVT", "define"); + subst("CUDART_INF", "HIP_INF", "define"); + subst("CUDART_INF_F", "HIP_INF_F", "define"); + subst("CUDART_L2E", "HIP_L2E", "define"); + subst("CUDART_L2E_F", "HIP_L2E_F", "define"); + subst("CUDART_L2E_HI", "HIP_L2E_HI", "define"); + subst("CUDART_L2E_LO", "HIP_L2E_LO", "define"); + subst("CUDART_L2T", "HIP_L2T", "define"); + subst("CUDART_L2T_F", "HIP_L2T_F", "define"); + subst("CUDART_LG2", "HIP_LG2", "define"); + subst("CUDART_LG2_F", "HIP_LG2_F", "define"); + subst("CUDART_LG2_HI", "HIP_LG2_HI", "define"); + subst("CUDART_LG2_LO", "HIP_LG2_LO", "define"); + subst("CUDART_LG2_X_1024", "HIP_LG2_X_1024", "define"); + subst("CUDART_LG2_X_1075", "HIP_LG2_X_1075", "define"); + subst("CUDART_LGE", "HIP_LGE", "define"); + subst("CUDART_LGE_F", "HIP_LGE_F", "define"); + subst("CUDART_LGE_HI", "HIP_LGE_HI", "define"); + subst("CUDART_LGE_LO", "HIP_LGE_LO", "define"); + subst("CUDART_LN2", "HIP_LN2", "define"); + subst("CUDART_LN2_F", "HIP_LN2_F", "define"); + subst("CUDART_LN2_HI", "HIP_LN2_HI", "define"); + subst("CUDART_LN2_LO", "HIP_LN2_LO", "define"); + subst("CUDART_LN2_X_1024", "HIP_LN2_X_1024", "define"); + subst("CUDART_LN2_X_1025", "HIP_LN2_X_1025", "define"); + subst("CUDART_LN2_X_1075", "HIP_LN2_X_1075", "define"); + subst("CUDART_LNPI", "HIP_LNPI", "define"); + subst("CUDART_LNPI_F", "HIP_LNPI_F", "define"); + subst("CUDART_LNT", "HIP_LNT", "define"); + subst("CUDART_LNT_F", "HIP_LNT_F", "define"); + subst("CUDART_LNT_HI", "HIP_LNT_HI", "define"); + subst("CUDART_LNT_LO", "HIP_LNT_LO", "define"); + subst("CUDART_MAX_NORMAL_F", "HIP_MAX_NORMAL_F", "define"); + subst("CUDART_MIN_DENORM", "HIP_MIN_DENORM", "define"); + subst("CUDART_MIN_DENORM_F", "HIP_MIN_DENORM_F", "define"); + subst("CUDART_NAN", "HIP_NAN", "define"); + subst("CUDART_NAN_F", "HIP_NAN_F", "define"); + subst("CUDART_NEG_ZERO", "HIP_NEG_ZERO", "define"); + subst("CUDART_NEG_ZERO_F", "HIP_NEG_ZERO_F", "define"); + subst("CUDART_NORM_HUGE_F", "HIP_NORM_HUGE_F", "define"); + subst("CUDART_ONE", "HIP_ONE", "define"); + subst("CUDART_ONE_F", "HIP_ONE_F", "define"); + subst("CUDART_PI", "HIP_PI", "define"); + subst("CUDART_PIO2", "HIP_PIO2", "define"); + subst("CUDART_PIO2_F", "HIP_PIO2_F", "define"); + subst("CUDART_PIO2_HI", "HIP_PIO2_HI", "define"); + subst("CUDART_PIO2_LO", "HIP_PIO2_LO", "define"); + subst("CUDART_PIO4", "HIP_PIO4", "define"); + subst("CUDART_PIO4_F", "HIP_PIO4_F", "define"); + subst("CUDART_PIO4_HI", "HIP_PIO4_HI", "define"); + subst("CUDART_PIO4_LO", "HIP_PIO4_LO", "define"); + subst("CUDART_PI_F", "HIP_PI_F", "define"); + subst("CUDART_PI_HI", "HIP_PI_HI", "define"); + subst("CUDART_PI_LO", "HIP_PI_LO", "define"); + subst("CUDART_REMQUO_BITS_F", "HIP_REMQUO_BITS_F", "define"); + subst("CUDART_REMQUO_MASK_F", "HIP_REMQUO_MASK_F", "define"); + subst("CUDART_SQRT_2OPI", "HIP_SQRT_2OPI", "define"); + subst("CUDART_SQRT_2PI", "HIP_SQRT_2PI", "define"); + subst("CUDART_SQRT_2PI_HI", "HIP_SQRT_2PI_HI", "define"); + subst("CUDART_SQRT_2PI_LO", "HIP_SQRT_2PI_LO", "define"); + subst("CUDART_SQRT_2_OVER_PI_F", "HIP_SQRT_2_OVER_PI_F", "define"); + subst("CUDART_SQRT_HALF", "HIP_SQRT_HALF", "define"); + subst("CUDART_SQRT_HALF_F", "HIP_SQRT_HALF_F", "define"); + subst("CUDART_SQRT_HALF_HI", "HIP_SQRT_HALF_HI", "define"); + subst("CUDART_SQRT_HALF_HI_F", "HIP_SQRT_HALF_HI_F", "define"); + subst("CUDART_SQRT_HALF_LO", "HIP_SQRT_HALF_LO", "define"); + subst("CUDART_SQRT_HALF_LO_F", "HIP_SQRT_HALF_LO_F", "define"); + subst("CUDART_SQRT_PIO2", "HIP_SQRT_PIO2", "define"); + subst("CUDART_SQRT_PIO2_HI", "HIP_SQRT_PIO2_HI", "define"); + subst("CUDART_SQRT_PIO2_LO", "HIP_SQRT_PIO2_LO", "define"); + subst("CUDART_SQRT_TWO", "HIP_SQRT_TWO", "define"); + subst("CUDART_SQRT_TWO_F", "HIP_SQRT_TWO_F", "define"); + subst("CUDART_THIRD", "HIP_THIRD", "define"); + subst("CUDART_THIRD_F", "HIP_THIRD_F", "define"); + subst("CUDART_TRIG_PLOSS", "HIP_TRIG_PLOSS", "define"); + subst("CUDART_TRIG_PLOSS_F", "HIP_TRIG_PLOSS_F", "define"); + subst("CUDART_TWOTHIRD", "HIP_TWOTHIRD", "define"); + subst("CUDART_TWO_TO_126_F", "HIP_TWO_TO_126_F", "define"); + subst("CUDART_TWO_TO_23", "HIP_TWO_TO_23", "define"); + subst("CUDART_TWO_TO_23_F", "HIP_TWO_TO_23_F", "define"); + subst("CUDART_TWO_TO_24_F", "HIP_TWO_TO_24_F", "define"); + subst("CUDART_TWO_TO_31_F", "HIP_TWO_TO_31_F", "define"); + subst("CUDART_TWO_TO_32_F", "HIP_TWO_TO_32_F", "define"); + subst("CUDART_TWO_TO_52", "HIP_TWO_TO_52", "define"); + subst("CUDART_TWO_TO_53", "HIP_TWO_TO_53", "define"); + subst("CUDART_TWO_TO_54", "HIP_TWO_TO_54", "define"); + subst("CUDART_TWO_TO_M1022", "HIP_TWO_TO_M1022", "define"); + subst("CUDART_TWO_TO_M126_F", "HIP_TWO_TO_M126_F", "define"); + subst("CUDART_TWO_TO_M54", "HIP_TWO_TO_M54", "define"); + subst("CUDART_ZERO", "HIP_ZERO", "define"); + subst("CUDART_ZERO_F", "HIP_ZERO_F", "define"); + subst("CUDA_ARRAY3D_CUBEMAP", "hipArrayCubemap", "define"); + subst("CUDA_ARRAY3D_LAYERED", "hipArrayLayered", "define"); + subst("CUDA_ARRAY3D_SURFACE_LDST", "hipArraySurfaceLoadStore", "define"); + subst("CUDA_ARRAY3D_TEXTURE_GATHER", "hipArrayTextureGather", "define"); + subst("CUDA_COOPERATIVE_LAUNCH_MULTI_DEVICE_NO_POST_LAUNCH_SYNC", "hipCooperativeLaunchMultiDeviceNoPostSync", "define"); + subst("CUDA_COOPERATIVE_LAUNCH_MULTI_DEVICE_NO_PRE_LAUNCH_SYNC", "hipCooperativeLaunchMultiDeviceNoPreSync", "define"); + subst("CUDA_EXTERNAL_MEMORY_DEDICATED", "hipExternalMemoryDedicated", "define"); + subst("CUDA_IPC_HANDLE_SIZE", "HIP_IPC_HANDLE_SIZE", "define"); + subst("CU_DEVICE_CPU", "hipCpuDeviceId", "define"); + subst("CU_DEVICE_INVALID", "hipInvalidDeviceId", "define"); + subst("CU_IPC_HANDLE_SIZE", "HIP_IPC_HANDLE_SIZE", "define"); + subst("CU_LAUNCH_PARAM_BUFFER_POINTER", "HIP_LAUNCH_PARAM_BUFFER_POINTER", "define"); + subst("CU_LAUNCH_PARAM_BUFFER_SIZE", "HIP_LAUNCH_PARAM_BUFFER_SIZE", "define"); + subst("CU_LAUNCH_PARAM_END", "HIP_LAUNCH_PARAM_END", "define"); + subst("CU_MEMHOSTALLOC_DEVICEMAP", "hipHostMallocMapped", "define"); + subst("CU_MEMHOSTALLOC_PORTABLE", "hipHostMallocPortable", "define"); + subst("CU_MEMHOSTALLOC_WRITECOMBINED", "hipHostMallocWriteCombined", "define"); + subst("CU_MEMHOSTREGISTER_DEVICEMAP", "hipHostRegisterMapped", "define"); + subst("CU_MEMHOSTREGISTER_IOMEMORY", "hipHostRegisterIoMemory", "define"); + subst("CU_MEMHOSTREGISTER_PORTABLE", "hipHostRegisterPortable", "define"); + subst("CU_MEMHOSTREGISTER_READ_ONLY", "hipHostRegisterReadOnly", "define"); + subst("CU_STREAM_PER_THREAD", "hipStreamPerThread", "define"); + subst("CU_TRSA_OVERRIDE_FORMAT", "HIP_TRSA_OVERRIDE_FORMAT", "define"); + subst("CU_TRSF_NORMALIZED_COORDINATES", "HIP_TRSF_NORMALIZED_COORDINATES", "define"); + subst("CU_TRSF_READ_AS_INTEGER", "HIP_TRSF_READ_AS_INTEGER", "define"); + subst("CU_TRSF_SRGB", "HIP_TRSF_SRGB", "define"); + subst("CubDebug", "HipcubDebug", "define"); + subst("REGISTER_CUDA_OPERATOR", "REGISTER_HIP_OPERATOR", "define"); + subst("REGISTER_CUDA_OPERATOR_CREATOR", "REGISTER_HIP_OPERATOR_CREATOR", "define"); + subst("_CubLog", "_HipcubLog", "define"); + subst("__CUB_ALIGN_BYTES", "__HIPCUB_ALIGN_BYTES", "define"); + subst("__CUDACC__", "__HIPCC__", "define"); + subst("cudaArrayCubemap", "hipArrayCubemap", "define"); + subst("cudaArrayDefault", "hipArrayDefault", "define"); + subst("cudaArrayLayered", "hipArrayLayered", "define"); + subst("cudaArraySurfaceLoadStore", "hipArraySurfaceLoadStore", "define"); + subst("cudaArrayTextureGather", "hipArrayTextureGather", "define"); + subst("cudaCooperativeLaunchMultiDeviceNoPostSync", "hipCooperativeLaunchMultiDeviceNoPostSync", "define"); + subst("cudaCooperativeLaunchMultiDeviceNoPreSync", "hipCooperativeLaunchMultiDeviceNoPreSync", "define"); + subst("cudaCpuDeviceId", "hipCpuDeviceId", "define"); + subst("cudaDeviceBlockingSync", "hipDeviceScheduleBlockingSync", "define"); + subst("cudaDeviceLmemResizeToMax", "hipDeviceLmemResizeToMax", "define"); + subst("cudaDeviceMapHost", "hipDeviceMapHost", "define"); + subst("cudaDeviceScheduleAuto", "hipDeviceScheduleAuto", "define"); + subst("cudaDeviceScheduleBlockingSync", "hipDeviceScheduleBlockingSync", "define"); + subst("cudaDeviceScheduleMask", "hipDeviceScheduleMask", "define"); + subst("cudaDeviceScheduleSpin", "hipDeviceScheduleSpin", "define"); + subst("cudaDeviceScheduleYield", "hipDeviceScheduleYield", "define"); + subst("cudaEventBlockingSync", "hipEventBlockingSync", "define"); + subst("cudaEventDefault", "hipEventDefault", "define"); + subst("cudaEventDisableTiming", "hipEventDisableTiming", "define"); + subst("cudaEventInterprocess", "hipEventInterprocess", "define"); + subst("cudaExternalMemoryDedicated", "hipExternalMemoryDedicated", "define"); + subst("cudaHostAllocDefault", "hipHostMallocDefault", "define"); + subst("cudaHostAllocMapped", "hipHostMallocMapped", "define"); + subst("cudaHostAllocPortable", "hipHostMallocPortable", "define"); + subst("cudaHostAllocWriteCombined", "hipHostMallocWriteCombined", "define"); + subst("cudaHostRegisterDefault", "hipHostRegisterDefault", "define"); + subst("cudaHostRegisterIoMemory", "hipHostRegisterIoMemory", "define"); + subst("cudaHostRegisterMapped", "hipHostRegisterMapped", "define"); + subst("cudaHostRegisterPortable", "hipHostRegisterPortable", "define"); + subst("cudaHostRegisterReadOnly", "hipHostRegisterReadOnly", "define"); + subst("cudaInvalidDeviceId", "hipInvalidDeviceId", "define"); + subst("cudaIpcMemLazyEnablePeerAccess", "hipIpcMemLazyEnablePeerAccess", "define"); + subst("cudaMemAttachGlobal", "hipMemAttachGlobal", "define"); + subst("cudaMemAttachHost", "hipMemAttachHost", "define"); + subst("cudaMemAttachSingle", "hipMemAttachSingle", "define"); + subst("cudaOccupancyDefault", "hipOccupancyDefault", "define"); + subst("cudaOccupancyDisableCachingOverride", "hipOccupancyDisableCachingOverride", "define"); + subst("cudaStreamDefault", "hipStreamDefault", "define"); + subst("cudaStreamNonBlocking", "hipStreamNonBlocking", "define"); + subst("cudaStreamPerThread", "hipStreamPerThread", "define"); + subst("cudaTextureType1D", "hipTextureType1D", "define"); + subst("cudaTextureType1DLayered", "hipTextureType1DLayered", "define"); + subst("cudaTextureType2D", "hipTextureType2D", "define"); + subst("cudaTextureType2DLayered", "hipTextureType2DLayered", "define"); + subst("cudaTextureType3D", "hipTextureType3D", "define"); + subst("cudaTextureTypeCubemap", "hipTextureTypeCubemap", "define"); + subst("cudaTextureTypeCubemapLayered", "hipTextureTypeCubemapLayered", "define"); +} + +# CUDA Kernel Launch Syntax +sub transformKernelLaunch { + no warnings qw/uninitialized/; + my $k = 0; + + # kern<...><<>>() syntax + $k += s/([:|\w]+)\s*<(.+)>\s*<<<\s*([^,\(\)]+|[\w\s:]*\([\w|\s|,|:|\+|\*|\-|\/|(?R)]+\))\s*,\s*([^,\(\)]+|[\w\s:]*\([\w|\s|,|:|\+|\*|\-|\/|(?R)]+\))\s*,\s*([^,\(\)]+|[\w\s:]*\([\w|\s|,|:|\+|\*|\-|\/|(?R)]+\))\s*,\s*([^,\(\)]+|[\w\s:]*\([\w|\s|,|:|\+|\*|\-|\/|(?R)]+\))\s*>>>\s*\(\s*\)/hipLaunchKernelGGL(HIP_KERNEL_NAME($1<$2>), $3, $4, $5, $6)/g; + # kern<...><<>>(...) syntax + $k += s/([:|\w]+)\s*<(.+)>\s*<<<\s*([^,\(\)]+|[\w\s:]*\([\w|\s|,|:|\+|\*|\-|\/|(?R)]+\))\s*,\s*([^,\(\)]+|[\w\s:]*\([\w|\s|,|:|\+|\*|\-|\/|(?R)]+\))\s*,\s*([^,\(\)]+|[\w\s:]*\([\w|\s|,|:|\+|\*|\-|\/|(?R)]+\))\s*,\s*([^,\(\)]+|[\w\s:]*\([\w|\s|,|:|\+|\*|\-|\/|(?R)]+\))\s*>>>\s*\(/hipLaunchKernelGGL(HIP_KERNEL_NAME($1<$2>), $3, $4, $5, $6, /g; + # kern<<>>() syntax + $k += s/([:|\w]+)\s*<<<\s*([^,\(\)]+|[\w\s:]*\([\w|\s|,|:|\+|\*|\-|\/|(?R)]+\))\s*,\s*([^,\(\)]+|[\w\s:]*\([\w|\s|,|:|\+|\*|\-|\/|(?R)]+\))\s*,\s*([^,\(\)]+|[\w\s:]*\([\w|\s|,|:|\+|\*|\-|\/|(?R)]+\))\s*,\s*([^,\(\)]+|[\w\s:]*\([\w|\s|,|:|\+|\*|\-|\/|(?R)]+\))\s*>>>\s*\(\s*\)/hipLaunchKernelGGL($1, $2, $3, $4, $5)/g; + # kern<<>>(...) syntax + $k += s/([:|\w]+)\s*<<<\s*([^,\(\)]+|[\w\s:]*\([\w|\s|,|:|\+|\*|\-|\/|(?R)]+\))\s*,\s*([^,\(\)]+|[\w\s:]*\([\w|\s|,|:|\+|\*|\-|\/|(?R)]+\))\s*,\s*([^,\(\)]+|[\w\s:]*\([\w|\s|,|:|\+|\*|\-|\/|(?R)]+\))\s*,\s*([^,\(\)]+|[\w\s:]*\([\w|\s|,|:|\+|\*|\-|\/|(?R)]+\))\s*>>>\s*\(/hipLaunchKernelGGL($1, $2, $3, $4, $5, /g; + + # kern<...><<>>() syntax + $k += s/([:|\w]+)\s*<(.+)>\s*<<<\s*([^,\(\)]+|[\w\s:]*\([\w|\s|,|:|\+|\*|\-|\/|(?R)]+\))\s*,\s*([^,\(\)]+|[\w\s:]*\([\w|\s|,|:|\+|\*|\-|\/|(?R)]+\))\s*,\s*([^,\(\)]+|[\w\s:]*\([\w|\s|,|:|\+|\*|\-|\/|(?R)]+\))\s*>>>\s*\(\s*\)/hipLaunchKernelGGL(HIP_KERNEL_NAME($1<$2>), $3, $4, $5, 0)/g; + # kern<...><<>>(...) syntax + $k += s/([:|\w]+)\s*<(.+)>\s*<<<\s*([^,\(\)]+|[\w\s:]*\([\w|\s|,|:|\+|\*|\-|\/|(?R)]+\))\s*,\s*([^,\(\)]+|[\w\s:]*\([\w|\s|,|:|\+|\*|\-|\/|(?R)]+\))\s*,\s*([^,\(\)]+|[\w\s:]*\([\w|\s|,|:|\+|\*|\-|\/|(?R)]+\))\s*>>>\s*\(/hipLaunchKernelGGL(HIP_KERNEL_NAME($1<$2>), $3, $4, $5, 0, /g; + # kern<<>>() syntax + $k += s/([:|\w]+)\s*<<<\s*([^,\(\)]+|[\w\s:]*\([\w|\s|,|:|\+|\*|\-|\/|(?R)]+\))\s*,\s*([^,\(\)]+|[\w\s:]*\([\w|\s|,|:|\+|\*|\-|\/|(?R)]+\))\s*,\s*([^,\(\)]+|[\w\s:]*\([\w|\s|,|:|\+|\*|\-|\/|(?R)]+\))\s*>>>\s*\(\s*\)/hipLaunchKernelGGL($1, $2, $3, $4, 0)/g; + # kern<<>>(...) syntax + $k += s/([:|\w]+)\s*<<<\s*([^,\(\)]+|[\w\s:]*\([\w|\s|,|:|\+|\*|\-|\/|(?R)]+\))\s*,\s*([^,\(\)]+|[\w\s:]*\([\w|\s|,|:|\+|\*|\-|\/|(?R)]+\))\s*,\s*([^,\(\)]+|[\w\s:]*\([\w|\s|,|:|\+|\*|\-|\/|(?R)]+\))\s*>>>\s*\(/hipLaunchKernelGGL($1, $2, $3, $4, 0, /g; + + # kern<...><<>>() syntax + $k += s/([:|\w]+)\s*<(.+)>\s*<<<\s*([^,\(\)]+|[\w\s:]*\([\w|\s|,|:|\+|\*|\-|\/|(?R)]+\))\s*,\s*([^,\(\)]+|[\w\s:]*\([\w|\s|,|:|\+|\*|\-|\/|(?R)]+\))\s*>>>\s*\(\s*\)/hipLaunchKernelGGL(HIP_KERNEL_NAME($1<$2>), $3, $4, 0, 0)/g; + # kern<...><<>>(...) syntax + $k += s/([:|\w]+)\s*<(.+)>\s*<<<\s*([^,\(\)]+|[\w\s:]*\([\w|\s|,|:|\+|\*|\-|\/|(?R)]+\))\s*,\s*([^,\(\)]+|[\w\s:]*\([\w|\s|,|:|\+|\*|\-|\/|(?R)]+\))\s*>>>\s*\(/hipLaunchKernelGGL(HIP_KERNEL_NAME($1<$2>), $3, $4, 0, 0, /g; + # kern<<>>() syntax + $k += s/([:|\w]+)\s*<<<\s*([^,\(\)]+|[\w\s:]*\([\w|\s|,|:|\+|\*|\-|\/|(?R)]+\))\s*,\s*([^,\(\)]+|[\w\s:]*\([\w|\s|,|:|\+|\*|\-|\/|(?R)]+\))\s*>>>\s*\(\s*\)/hipLaunchKernelGGL($1, $2, $3, 0, 0)/g; + # kern<<>>(...) syntax + $k += s/([:|\w]+)\s*<<<\s*([^,\(\)]+|[\w\s:]*\([\w|\s|,|:|\+|\*|\-|\/|(?R)]+\))\s*,\s*([^,\(\)]+|[\w\s:]*\([\w|\s|,|:|\+|\*|\-|\/|(?R)]+\))\s*>>>\s*\(/hipLaunchKernelGGL($1, $2, $3, 0, 0, /g; + + if ($k) { + $ft{'kernel_launch'} += $k; + $Tkernels{$1}++; + } +} + +sub transformCubNamespace { + my $k = 0; + $k += s/using\s*namespace\s*cub/using namespace hipcub/g; + $k += s/\bcub::\b/hipcub::/g; + return $k; +} + +sub transformHostFunctions { + my $k = 0; + foreach $func ( + "hipMemcpyToSymbol", + "hipMemcpyToSymbolAsync" + ) + { + $k += s/(?\($2\),/g; + } + foreach $func ( + "hipFuncGetAttributes" + ) + { + $k += s/(?\($4\)$5/g; + } + foreach $func ( + "hipGraphExecMemcpyNodeSetParamsToSymbol", + "hipGraphMemcpyNodeSetParamsFromSymbol" + ) + { + $k += s/(?", $outfile) or die "error: could not open $outfile"; + $OUTFILE = OUTFILE; + } else { + open(INFILE,"<", $fileName) or die "error: could not open $fileName"; + if ($hipFileName ne "") { + open(OUTFILE,">", $hipFileName) or die "error: could not open $hipFileName"; + $OUTFILE = OUTFILE; + } else { + $OUTFILE = STDOUT; + } + } + # Count of transforms in this file + clearStats(\%ft, \@statNames); + my $countIncludes = 0; + my $countKeywords = 0; + my $warnings = 0; + my %warningTags; + my $lineCount = 0; + %tags = (); + %convertedTags = (); + %tagsToConvertedTags = (); + undef $/; + # Read whole file at once, so we can match newlines + while () { + $countKeywords += m/__global__/; + $countKeywords += m/__shared__/; + unless ($quiet_warnings) { + my @lines = split /\n/, $_; + # Copy the whole file + my $tmp = $_; + my $line_num = 0; + foreach (@lines) { + $line_num++; + if (!$experimental) { + $s = warnExperimentalFunctions($line_num); + $warnings += $s; + } + $s = warnRemovedFunctions($line_num); + $warnings += $s; + $s = warnDeprecatedFunctions($line_num); + $warnings += $s; + $s = warnUnsupportedFunctions($line_num); + $warnings += $s; + if ($roc) { + $s = warnRocOnlyUnsupportedFunctions($line_num); + $warnings += $s; + } else { + $s = warnHipOnlyUnsupportedFunctions($line_num); + $warnings += $s; + } + $s = warnUnsupportedDeviceFunctions($line_num); + $warnings += $s; + } + $_ = $tmp; + } + if ($roc) { + rocSubstitutions(); + } + if ($experimental) { + experimentalSubstitutions(); + } + simpleSubstitutions(); + if (!$cuda_kernel_execution_syntax || $hip_kernel_execution_syntax) { + transformKernelLaunch(); + } + transformCubNamespace(); + my $hasDeviceCode = $countKeywords + $ft{'device_function'}; + unless ($quiet_warnings) { + # Copy into array of lines, process line-by-line to show warnings + if ($hasDeviceCode or (/\bcu|CU/) or (/<<<.*>>>/)) { + my @lines = split /\n/, $_; + # Copy the whole file + my $tmp = $_; + my $line_num = 0; + foreach (@lines) { + $line_num++; + # Remove any whitelisted words + foreach $w (@whitelist) { + redo if s/\b$w\b/ZAP/ + } + my $tag; + if ((/(\bcuda[A-Z]\w+)/) or ((/<<<.*>>>/) and ($hip_kernel_execution_syntax))) { + # Flag any remaining code that look like cuda API calls: may want to add these to hipify + $tag = (defined $1) ? $1 : "Launch"; + } + if (defined $tag) { + $warnings++; + $warningTags{$tag}++; + print STDERR " warning: $fileName:#$line_num : $_\n"; + } + } + $_ = $tmp; + } + } + if ($hasDeviceCode > 0) { + $ft{'device_function'} += countSupportedDeviceFunctions(); + } + transformHostFunctions(); + # TODO: would like to move this code outside loop but it uses $_ which contains the whole file + unless ($no_output) { + my $apiCalls = $ft{'error'} + $ft{'init'} + $ft{'version'} + $ft{'device'} + $ft{'context'} + $ft{'module'} + $ft{'library'} + $ft{'memory'} + $ft{'virtual_memory'} + $ft{'ordered_memory'} + $ft{'multicast'} + $ft{'unified'} + $ft{'stream'} + $ft{'event'} + $ft{'external_resource'} + $ft{'stream_memory'} + $ft{'execution'} + $ft{'graph'} + $ft{'occupancy'} + $ft{'texture'} + $ft{'surface'} + $ft{'tensor'} + $ft{'peer'} + $ft{'graphics'} + $ft{'driver_entry_point'} + $ft{'cpp'} + $ft{'coredump'} + $ft{'driver_interact'} + $ft{'profiler'} + $ft{'openGL'} + $ft{'D3D9'} + $ft{'D3D10'} + $ft{'D3D11'} + $ft{'VDPAU'} + $ft{'EGL'} + $ft{'thread'} + $ft{'complex'} + $ft{'library'} + $ft{'device_library'} + $ft{'device_type'} + $ft{'include'} + $ft{'include_cuda_main_header'} + $ft{'include_cuda_main_header_v2'} + $ft{'type'} + $ft{'literal'} + $ft{'numeric_literal'} + $ft{'define'}; + my $kernStuff = $hasDeviceCode + $ft{'kernel_launch'} + $ft{'device_function'}; + my $totalCalls = $apiCalls + $kernStuff; + $is_dos = m/\r\n$/; + if ($totalCalls and ($countIncludes == 0) and ($kernStuff != 0)) { + # TODO: implement hipify-clang's logic with header files AMAP + print $OUTFILE '#include "hip/hip_runtime.h"' . ($is_dos ? "\r\n" : "\n"); + } + print $OUTFILE "$_"; + } + $lineCount = $_ =~ tr/\n//; + } + my $totalConverted = totalStats(\%ft); + if (($totalConverted+$warnings) and $print_stats) { + printStats(\%ft, $warnings, $lineCount, $fileName, 0); + } + # Update totals for all files + addStats(\%tt, \%ft); + $Twarnings += $warnings; + $TlineCount += $lineCount; + foreach $key (keys %warningTags) { + $TwarningTags{$key} += $warningTags{$key}; + } + } # Unless filtered directory or file +} +# Print total stats for all files processed +if ($print_stats and ($fileCount > 1)) { + printStats(\%tt, $Twarnings, $TlineCount, "GLOBAL", 1); + print STDERR "\n"; +} diff --git a/tools/ci_build/op_registration_utils.py b/tools/ci_build/op_registration_utils.py index 811ce424eae10..b61188fa74f1c 100644 --- a/tools/ci_build/op_registration_utils.py +++ b/tools/ci_build/op_registration_utils.py @@ -5,10 +5,11 @@ Utilities to help process files containing kernel registrations. """ +from __future__ import annotations + import os import pathlib import sys -import typing from logger import get_logger @@ -88,12 +89,12 @@ class RegistrationProcessor: def process_registration( self, - lines: typing.List[str], + lines: list[str], domain: str, operator: str, start_version: int, - end_version: typing.Optional[int] = None, - type: typing.Optional[str] = None, + end_version: int | None = None, + type: str | None = None, ): """ Process lines that contain a kernel registration. @@ -119,7 +120,7 @@ def ok(self): return False # return False as the derived class must override to report the real status -def _process_lines(lines: typing.List[str], offset: int, registration_processor: RegistrationProcessor): +def _process_lines(lines: list[str], offset: int, registration_processor: RegistrationProcessor): """ Process one or more lines that contain a kernel registration. Merge lines if split over multiple, and call registration_processor.process_registration with the original lines @@ -139,7 +140,7 @@ def _process_lines(lines: typing.List[str], offset: int, registration_processor: onnx_two_typed_op_len = len(onnx_two_typed_op) onnx_versioned_two_typed_op = "ONNX_OPERATOR_VERSIONED_TWO_TYPED_KERNEL_CLASS_NAME" onnx_versioned_two_typed_op_len = len(onnx_versioned_two_typed_op) - end_marks = tuple([");", ")>", ")>,", ")>,};", ")>};"]) + end_marks = (");", ")>", ")>,", ")>,};", ")>};") end_mark = "" lines_to_process = [] @@ -236,9 +237,7 @@ def _process_lines(lines: typing.List[str], offset: int, registration_processor: return offset + 1 -def process_kernel_registration_file( - filename: typing.Union[str, pathlib.Path], registration_processor: RegistrationProcessor -): +def process_kernel_registration_file(filename: str | pathlib.Path, registration_processor: RegistrationProcessor): """ Process a kernel registration file using registration_processor. :param filename: Path to file containing kernel registrations. diff --git a/tools/ci_build/op_registration_validator.py b/tools/ci_build/op_registration_validator.py index d92050a31f967..6cc7f3bb5ec6d 100644 --- a/tools/ci_build/op_registration_validator.py +++ b/tools/ci_build/op_registration_validator.py @@ -5,6 +5,8 @@ Validate ORT kernel registrations. """ +from __future__ import annotations + import argparse import dataclasses import itertools @@ -37,8 +39,8 @@ class RegistrationInfo: domain: str operator: str start_version: int - end_version: typing.Optional[int] - lines: typing.List[str] + end_version: int | None + lines: list[str] def domain_and_op_str(self): return f"{self.domain}:{self.operator}" @@ -50,16 +52,16 @@ def _log_registration_error(r: RegistrationInfo, message: str): class RegistrationValidator(op_registration_utils.RegistrationProcessor): def __init__(self): - self.all_registrations: typing.List[RegistrationInfo] = [] + self.all_registrations: list[RegistrationInfo] = [] def process_registration( self, - lines: typing.List[str], + lines: list[str], domain: str, operator: str, start_version: int, - end_version: typing.Optional[int] = None, - type: typing.Optional[str] = None, + end_version: int | None = None, + type: str | None = None, ): self.all_registrations.append( RegistrationInfo( @@ -114,7 +116,7 @@ def _validate_registrations_for_domain_and_op(self, registrations: typing.Iterat return num_invalid_registrations - def _validate_registration(self, r: RegistrationInfo, next_r: typing.Optional[RegistrationInfo]) -> bool: + def _validate_registration(self, r: RegistrationInfo, next_r: RegistrationInfo | None) -> bool: """ Validates a registration, `r`, with the next one in sorted order for a single domain and op, `next_r`, and returns whether it is valid. diff --git a/tools/ci_build/patch_manylinux.py b/tools/ci_build/patch_manylinux.py index 0d1cb37cc40ac..af03b594d9a69 100644 --- a/tools/ci_build/patch_manylinux.py +++ b/tools/ci_build/patch_manylinux.py @@ -1,6 +1,7 @@ #!/usr/bin/env python3 # Copyright (c) Microsoft Corporation. All rights reserved. # Licensed under the MIT License. +from __future__ import annotations import argparse import os diff --git a/tools/ci_build/reduce_op_kernels.py b/tools/ci_build/reduce_op_kernels.py index df6bbf7a4058e..c917c544a557c 100755 --- a/tools/ci_build/reduce_op_kernels.py +++ b/tools/ci_build/reduce_op_kernels.py @@ -1,6 +1,7 @@ # !/usr/bin/env python3 # Copyright (c) Microsoft Corporation. All rights reserved. # Licensed under the MIT License. +from __future__ import annotations import argparse import io @@ -28,7 +29,7 @@ def _adapt_filters_for_extended_minimal_build( - base_required_ops: typing.Optional[dict], base_op_type_impl_filter: typing.Optional[OpTypeImplFilterInterface] + base_required_ops: dict | None, base_op_type_impl_filter: OpTypeImplFilterInterface | None ): """ Adapts the values returned by parse_config() for an extended minimal build or higher. @@ -68,7 +69,7 @@ def _adapt_filters_for_extended_minimal_build( if base_required_ops is not None: adapted_required_ops = base_required_ops.copy() for domain, optype, opset in extended_minimal_build_required_op_ids: - adapted_required_ops.setdefault(domain, dict()).setdefault(opset, set()).add(optype) + adapted_required_ops.setdefault(domain, {}).setdefault(opset, set()).add(optype) adapted_op_type_impl_filter = None if base_op_type_impl_filter is not None: @@ -77,7 +78,7 @@ class _AdaptedFilter(OpTypeImplFilterInterface): def __init__( self, filter_to_adapt: OpTypeImplFilterInterface, - required_domain_and_optypes: typing.Set[typing.Tuple[str, str]], + required_domain_and_optypes: set[tuple[str, str]], ): self.filter_to_adapt = filter_to_adapt self.required_domain_and_optypes = required_domain_and_optypes @@ -107,17 +108,15 @@ class _ExcludingRegistrationProcessor(op_registration_utils.RegistrationProcesso def __init__( self, - required_ops: typing.Optional[dict], - op_type_impl_filter: typing.Optional[OpTypeImplFilterInterface], + required_ops: dict | None, + op_type_impl_filter: OpTypeImplFilterInterface | None, output_file: io.TextIOWrapper, ): self._required_ops = required_ops self._op_type_impl_filter = op_type_impl_filter self._output_file = output_file - def _is_op_required( - self, domain: str, operator: str, start_version: int, end_version: typing.Optional[int] - ) -> bool: + def _is_op_required(self, domain: str, operator: str, start_version: int, end_version: int | None) -> bool: """See if an op is required.""" if self._required_ops is None: return True @@ -134,12 +133,12 @@ def _is_op_required( def process_registration( self, - lines: typing.List[str], + lines: list[str], constant_for_domain: str, operator: str, start_version: int, - end_version: typing.Optional[int] = None, - type: typing.Optional[str] = None, + end_version: int | None = None, + type: str | None = None, ): registration_identifier = "{}:{}({}){}".format( constant_for_domain, operator, start_version, f"<{type}>" if type else "" @@ -202,8 +201,8 @@ def _generate_provider_registrations( ort_root: Path, build_dir: Path, use_cuda: bool, - required_ops: typing.Optional[dict], - op_type_impl_filter: typing.Optional[OpTypeImplFilterInterface], + required_ops: dict | None, + op_type_impl_filter: OpTypeImplFilterInterface | None, ): """Generate provider registration files.""" kernel_registration_files = [ diff --git a/tools/ci_build/replace_urls_in_deps.py b/tools/ci_build/replace_urls_in_deps.py index 37dad358a6feb..2569b20fb44a5 100644 --- a/tools/ci_build/replace_urls_in_deps.py +++ b/tools/ci_build/replace_urls_in_deps.py @@ -4,6 +4,7 @@ # This file replaces https URLs in deps.txt to local file paths. It runs after we download the dependencies from Azure # DevOps Artifacts +from __future__ import annotations import argparse import csv diff --git a/tools/ci_build/requirements/pybind/requirements.txt b/tools/ci_build/requirements/pybind/requirements.txt index 8f00a25627c21..0a8caecd11e4e 100644 --- a/tools/ci_build/requirements/pybind/requirements.txt +++ b/tools/ci_build/requirements/pybind/requirements.txt @@ -6,3 +6,4 @@ sympy>=1.10 packaging cerberus psutil +flatbuffers diff --git a/tools/ci_build/set-trigger-rules.py b/tools/ci_build/set-trigger-rules.py index b46d1e2559e46..78f59452d1284 100644 --- a/tools/ci_build/set-trigger-rules.py +++ b/tools/ci_build/set-trigger-rules.py @@ -4,7 +4,7 @@ # -------------------------------------------------------------------------- # This script is used to add trigger rules to the workflow files. - +from __future__ import annotations import multiprocessing import os @@ -71,12 +71,11 @@ def main(): os.chdir(working_dir) trigger_rules = {"skip-docs.yml": skip_doc_changes, "skip-js.yml": skip_js_changes} - for key in trigger_rules: + for key, skip_changes in trigger_rules.items(): trigger_file = os.path.join(working_dir, "triggers", key) with open(trigger_file) as f1: trigger_lines = f1.readlines() - skip_changes = trigger_rules[key] pool = multiprocessing.Pool() pool.starmap(add_trigger_filter, [(file, trigger_lines) for file in skip_changes]) pool.close() diff --git a/tools/ci_build/update_tsaoptions.py b/tools/ci_build/update_tsaoptions.py index 07be746aa1981..394a45cc4ee3b 100644 --- a/tools/ci_build/update_tsaoptions.py +++ b/tools/ci_build/update_tsaoptions.py @@ -1,5 +1,6 @@ # Copyright (c) Microsoft Corporation. All rights reserved. # Licensed under the MIT License. +from __future__ import annotations import json import os diff --git a/tools/ci_build/upload_python_package_to_azure_storage.py b/tools/ci_build/upload_python_package_to_azure_storage.py index 16ff5d1f71611..c90ec1aa92b6b 100755 --- a/tools/ci_build/upload_python_package_to_azure_storage.py +++ b/tools/ci_build/upload_python_package_to_azure_storage.py @@ -1,6 +1,7 @@ #!/usr/bin/env python3 # Copyright (c) Microsoft Corporation. All rights reserved. # Licensed under the MIT License. +from __future__ import annotations import argparse import logging diff --git a/tools/nuget/generate_nuspec_for_native_nuget.py b/tools/nuget/generate_nuspec_for_native_nuget.py index 11842f34ce45b..8ccb2c054900e 100644 --- a/tools/nuget/generate_nuspec_for_native_nuget.py +++ b/tools/nuget/generate_nuspec_for_native_nuget.py @@ -382,6 +382,7 @@ def generate_files(line_list, args): "tensorrt_ep_shared_lib": "onnxruntime_providers_tensorrt.dll", "openvino_ep_shared_lib": "onnxruntime_providers_openvino.dll", "cuda_ep_shared_lib": "onnxruntime_providers_cuda.dll", + "qnn_ep_shared_lib": "onnxruntime_providers_qnn.dll", "onnxruntime_perf_test": "onnxruntime_perf_test.exe", "onnx_test_runner": "onnx_test_runner.exe", } @@ -698,7 +699,7 @@ def generate_files(line_list, args): + '\\native" />' ) - if args.execution_provider == "rocm" or is_rocm_gpu_package and not is_ado_packaging_build: + if args.execution_provider == "rocm" or (is_rocm_gpu_package and not is_ado_packaging_build): files_list.append( "' ) - if args.execution_provider == "cuda" or is_cuda_gpu_win_sub_package and not is_ado_packaging_build: + if args.execution_provider == "cuda" or (is_cuda_gpu_win_sub_package and not is_ado_packaging_build): files_list.append( "' ) + if args.execution_provider == "qnn" or (is_qnn_package and not is_ado_packaging_build): + files_list.append( + "' + ) + files_list.append( + "' + ) + # process all other library dependencies if is_cpu_package or is_cuda_gpu_package or is_dml_package or is_mklml_package: # Process dnnl dependency diff --git a/tools/nuget/validate_package.py b/tools/nuget/validate_package.py index 1079b64c9a90f..961109c595ed5 100644 --- a/tools/nuget/validate_package.py +++ b/tools/nuget/validate_package.py @@ -106,7 +106,7 @@ def check_if_dlls_are_present( ): platforms = platforms_supported.strip().split(",") if package_type == "tarball": - file_list_in_package = list() + file_list_in_package = [] for dirpath, _dirnames, filenames in os.walk(package_path): file_list_in_package += [os.path.join(dirpath, file) for file in filenames] else: diff --git a/tools/python/dump_ort_model.py b/tools/python/dump_ort_model.py index b9e3bfa0d3bcd..9d7e23bf3ad3a 100644 --- a/tools/python/dump_ort_model.py +++ b/tools/python/dump_ort_model.py @@ -80,7 +80,7 @@ def _dump_node(self, node: fbs.Node): outputs = [node.Outputs(i).decode() for i in range(node.OutputsLength())] print( f"{node.Index()}:{node.Name().decode()}({domain}:{optype}:{since_version}) " - f'inputs=[{",".join(inputs)}] outputs=[{",".join(outputs)}]' + f"inputs=[{','.join(inputs)}] outputs=[{','.join(outputs)}]" ) def _dump_graph(self, graph: fbs.Graph): diff --git a/tools/python/find_optimizer_opset_version_updates_required.py b/tools/python/find_optimizer_opset_version_updates_required.py index b46f7e4a54d9c..3c7d94b8ba038 100644 --- a/tools/python/find_optimizer_opset_version_updates_required.py +++ b/tools/python/find_optimizer_opset_version_updates_required.py @@ -7,7 +7,6 @@ import logging import os import re -import typing logging.basicConfig(format="[%(levelname)s] - %(message)s", level=logging.DEBUG) log = logging.getLogger() @@ -30,7 +29,7 @@ def parse_args(): return args -def get_call_args_from_file(filename: str, function_or_declaration: str) -> typing.List[str]: +def get_call_args_from_file(filename: str, function_or_declaration: str) -> list[str]: """ Search a file for all function calls or declarations that match the provided name. Requires both the opening '(' and closing ')' to be on the same line. @@ -63,7 +62,7 @@ def get_call_args_from_file(filename: str, function_or_declaration: str) -> typi return results -def get_multiline_call_args_from_file(filename: str, function_or_declaration: str) -> typing.List[str]: +def get_multiline_call_args_from_file(filename: str, function_or_declaration: str) -> list[str]: """ Search a file for all function calls or declarations that match the provided name. Allows the opening '(' and closing ')' to be split across multiple lines. @@ -96,7 +95,7 @@ def get_multiline_call_args_from_file(filename: str, function_or_declaration: st return results -def _add_if_newer(domain: str, op: str, opset: int, op_to_opset: typing.Dict[str, int]): +def _add_if_newer(domain: str, op: str, opset: int, op_to_opset: dict[str, int]): key = domain + "." + op if key not in op_to_opset or op_to_opset[key] < opset: op_to_opset[key] = opset diff --git a/tools/python/gen_contrib_doc.py b/tools/python/gen_contrib_doc.py index ab9421b395326..d160a004e36a1 100644 --- a/tools/python/gen_contrib_doc.py +++ b/tools/python/gen_contrib_doc.py @@ -8,10 +8,8 @@ import pathlib import sys from collections import defaultdict -from typing import Any, Dict, List, Sequence, Set, Text, Tuple # noqa: F401 -import numpy as np # type: ignore -from onnx import AttributeProto, FunctionProto # noqa: F401 +import numpy as np import onnxruntime.capi.onnxruntime_pybind11_state as rtpy from onnxruntime.capi.onnxruntime_pybind11_state import schemadef # noqa: F401 @@ -305,11 +303,6 @@ def support_level_str(level): # type: (OpSchema.SupportType) -> Text return "experimental " if level == OpSchema.SupportType.EXPERIMENTAL else "" -# def function_status_str(status=OperatorStatus.Value("EXPERIMENTAL")): # type: ignore -# return \ -# "experimental " if status == OperatorStatus.Value('EXPERIMENTAL') else "" # type: ignore - - def main(output_path: str, domain_filter: [str]): with open(output_path, "w", newline="", encoding="utf-8") as fout: fout.write("## Contrib Operator Schemas\n") @@ -320,9 +313,7 @@ def main(output_path: str, domain_filter: [str]): ) # domain -> support level -> name -> [schema] - index = defaultdict( - lambda: defaultdict(lambda: defaultdict(list)) - ) # type: Dict[Text, Dict[int, Dict[Text, List[OpSchema]]]] + index = defaultdict(lambda: defaultdict(lambda: defaultdict(list))) # type: Dict[Text, Dict[int, Dict[Text, List[OpSchema]]]] for schema in rtpy.get_all_operator_schema(): index[schema.domain][int(schema.support_level)][schema.name].append(schema) @@ -331,17 +322,15 @@ def main(output_path: str, domain_filter: [str]): # Preprocess the Operator Schemas # [(domain, [(support_level, [(schema name, current schema, all versions schemas)])])] - operator_schemas = ( - list() - ) # type: List[Tuple[Text, List[Tuple[int, List[Tuple[Text, OpSchema, List[OpSchema]]]]]]] + operator_schemas = [] # type: List[Tuple[Text, List[Tuple[int, List[Tuple[Text, OpSchema, List[OpSchema]]]]]]] exsting_ops = set() # type: Set[Text] for domain, _supportmap in sorted(index.items()): if not should_render_domain(domain, domain_filter): continue - processed_supportmap = list() + processed_supportmap = [] for _support, _namemap in sorted(_supportmap.items()): - processed_namemap = list() + processed_namemap = [] for n, unsorted_versions in sorted(_namemap.items()): versions = sorted(unsorted_versions, key=lambda s: s.since_version) schema = versions[-1] @@ -394,7 +383,7 @@ def main(output_path: str, domain_filter: [str]): parser.add_argument( "--domains", nargs="+", - help="Filter to specified domains. " "e.g. `--domains com.microsoft com.microsoft.nchwc`", # noqa: ISC001 + help="Filter to specified domains. e.g. `--domains com.microsoft com.microsoft.nchwc`", ) parser.add_argument( "--output_path", diff --git a/tools/python/onnx2tfevents.py b/tools/python/onnx2tfevents.py index 9dfde13090b07..909bc04817ff1 100644 --- a/tools/python/onnx2tfevents.py +++ b/tools/python/onnx2tfevents.py @@ -13,7 +13,7 @@ import inspect import itertools from abc import ABC, abstractmethod -from typing import Callable, List +from collections.abc import Callable import numpy as np import onnx @@ -203,7 +203,7 @@ def _add_sections(self, name: str) -> None: if len(sec) > 0: self.sections.add(sec) - def _get_sections(self, curr_name: str, sections: List[str]) -> None: + def _get_sections(self, curr_name: str, sections: list[str]) -> None: for section in self.sections: if curr_name.startswith(section) and (len(curr_name) == len(section) or curr_name[len(section)] == "."): sections.append(section) @@ -217,8 +217,7 @@ def _transform_name(self, name: str) -> str: if "/" in name: if name.startswith(f"/{self.original_module_name}/"): name = name[len(self.original_module_name) + 2 :] - if name.startswith("/"): - name = name[1:] + name = name.removeprefix("/") return name sections = [] diff --git a/tools/python/ort_test_dir_utils.py b/tools/python/ort_test_dir_utils.py index 3af407b2aeee6..59bb6670c8794 100644 --- a/tools/python/ort_test_dir_utils.py +++ b/tools/python/ort_test_dir_utils.py @@ -159,7 +159,7 @@ def save_data(prefix, name_data_map, model_info): sess = ort.InferenceSession(test_model_filename, so) outputs = sess.run(output_names, name_input_map) name_output_map = {} - for name, data in zip(output_names, outputs): + for name, data in zip(output_names, outputs, strict=False): name_output_map[name] = data save_data("output", name_output_map, model_outputs) diff --git a/tools/python/run_CIs_for_branch.py b/tools/python/run_CIs_for_branch.py index 975ea2b988d75..b8d9b9d9d5f72 100644 --- a/tools/python/run_CIs_for_branch.py +++ b/tools/python/run_CIs_for_branch.py @@ -1,13 +1,13 @@ #!/usr/bin/env python3 # Copyright (c) Microsoft Corporation. All rights reserved. # Licensed under the MIT License. +from __future__ import annotations import argparse import json import os import subprocess import sys -import typing from run_CIs_for_external_pr import get_pipeline_names from util.platform_helpers import is_windows @@ -78,7 +78,7 @@ def _parse_args(): return args -def _run_az_pipelines_command(command: typing.List[str]): +def _run_az_pipelines_command(command: list[str]): try: az = "az.cmd" if is_windows() else "az" az_output = subprocess.run([az, "pipelines", *command], capture_output=True, text=True, check=True) diff --git a/tools/python/run_CIs_for_external_pr.py b/tools/python/run_CIs_for_external_pr.py index 228c8016170d9..cee32073fa473 100644 --- a/tools/python/run_CIs_for_external_pr.py +++ b/tools/python/run_CIs_for_external_pr.py @@ -1,13 +1,13 @@ #!/usr/bin/env python3 # Copyright (c) Microsoft Corporation. All rights reserved. # Licensed under the MIT License. +from __future__ import annotations import argparse import json import os import subprocess import sys -import typing def get_pipeline_names(): @@ -72,7 +72,7 @@ def _parse_args(): return args -def run_gh_pr_command(command: typing.List[str], check: bool = True): +def run_gh_pr_command(command: list[str], check: bool = True): try: return subprocess.run(["gh", "pr", *command], capture_output=True, text=True, check=check) except subprocess.CalledProcessError as cpe: diff --git a/tools/python/run_adb.py b/tools/python/run_adb.py index 7506a8699df05..aefdb2344d050 100755 --- a/tools/python/run_adb.py +++ b/tools/python/run_adb.py @@ -1,17 +1,17 @@ #!/usr/bin/env python3 # Copyright (c) Microsoft Corporation. All rights reserved. # Licensed under the MIT License. +from __future__ import annotations import logging import os import sys -import typing from util import run from util.android import get_sdk_tool_paths -def run_adb(android_sdk_root: str, args: typing.List[str]): +def run_adb(android_sdk_root: str, args: list[str]): sdk_tool_paths = get_sdk_tool_paths(android_sdk_root) run(sdk_tool_paths.adb, *args) diff --git a/tools/python/run_android_emulator.py b/tools/python/run_android_emulator.py index 2826921726556..6d7c29fc58296 100755 --- a/tools/python/run_android_emulator.py +++ b/tools/python/run_android_emulator.py @@ -1,6 +1,7 @@ #!/usr/bin/env python3 # Copyright (c) Microsoft Corporation. All rights reserved. # Licensed under the MIT License. +from __future__ import annotations import argparse import contextlib diff --git a/tools/python/sparsify_initializers.py b/tools/python/sparsify_initializers.py index f9cc8db38ecff..14f2e0b62c069 100644 --- a/tools/python/sparsify_initializers.py +++ b/tools/python/sparsify_initializers.py @@ -5,15 +5,15 @@ # This script opens an existing model in onnx format and attempts to # move initializers from model.graph.initializer field to model.graph.sparse_initializer field # and convert them into ONNX COO flat index format. +from __future__ import annotations import argparse import logging import sys -from typing import List, Tuple # noqa: F401 import numpy as np import onnx -from onnx import ModelProto, SparseTensorProto, TensorProto, numpy_helper # noqa: F401 +from onnx import ModelProto, TensorProto, numpy_helper logger = logging.getLogger(__name__) @@ -54,9 +54,7 @@ def setup_logging(verbose): # type: (bool) -> None logger.setLevel(logging_level) -def convert_tensor_to_sparse( - tensor, sparsity_threshold, tolerance -): # type: (TensorProto, float, float) -> Tuple[SparseTensorProto, float] +def convert_tensor_to_sparse(tensor, sparsity_threshold, tolerance): # type: (TensorProto, float, float) -> Tuple[SparseTensorProto, float] """returns a tuple of sparse_tensor and sparsity level""" values = [] indices = [] @@ -140,9 +138,7 @@ def convert_tensor_to_sparse( return (sparse_tensor, sparsity) -def convert_initializers( - model, exclude_names, sparsity_threshold, tolerance -): # type: (ModelProto, List[str], float, float) -> None +def convert_initializers(model, exclude_names, sparsity_threshold, tolerance): # type: (ModelProto, List[str], float, float) -> None graph = model.graph converted_sparse = [] remaining_initializers = [] diff --git a/tools/python/util/android/android.py b/tools/python/util/android/android.py index 24004d6be761d..8f3ed97cae53f 100644 --- a/tools/python/util/android/android.py +++ b/tools/python/util/android/android.py @@ -1,5 +1,6 @@ # Copyright (c) Microsoft Corporation. All rights reserved. # Licensed under the MIT License. +from __future__ import annotations import collections import contextlib @@ -108,7 +109,7 @@ def _stop_process_with_pid(pid: int): def start_emulator( sdk_tool_paths: SdkToolPaths, avd_name: str, - extra_args: typing.Optional[typing.Sequence[str]] = None, + extra_args: typing.Sequence[str] | None = None, timeout_minutes: int = 20, ) -> subprocess.Popen: if check_emulator_running_using_avd_name(avd_name=avd_name): @@ -326,7 +327,7 @@ def stop_emulator_by_pid(emulator_pid: int, timeout_seconds: int = 120): _log.info("Emulator stopped successfully.") -def stop_emulator(emulator_proc_or_pid: typing.Union[subprocess.Popen, int], timeout_seconds: int = 120): +def stop_emulator(emulator_proc_or_pid: subprocess.Popen | int, timeout_seconds: int = 120): """ Stops the emulator process, checking its running status before and after stopping. :param emulator_proc_or_pid: The emulator process (subprocess.Popen) or PID (int). diff --git a/tools/python/util/file_utils.py b/tools/python/util/file_utils.py index 0373ac171144f..4036841cbfd34 100644 --- a/tools/python/util/file_utils.py +++ b/tools/python/util/file_utils.py @@ -1,12 +1,13 @@ # Copyright (c) Microsoft Corporation. All rights reserved. # Licensed under the MIT License. +from __future__ import annotations import os import pathlib import typing -def path_match_suffix_ignore_case(path: typing.Union[pathlib.Path, str], suffix: str) -> bool: +def path_match_suffix_ignore_case(path: pathlib.Path | str, suffix: str) -> bool: """ Returns whether `path` ends in `suffix`, ignoring case. """ @@ -16,8 +17,8 @@ def path_match_suffix_ignore_case(path: typing.Union[pathlib.Path, str], suffix: def files_from_file_or_dir( - file_or_dir_path: typing.Union[pathlib.Path, str], predicate: typing.Callable[[pathlib.Path], bool] = lambda _: True -) -> typing.List[pathlib.Path]: + file_or_dir_path: pathlib.Path | str, predicate: typing.Callable[[pathlib.Path], bool] = lambda _: True +) -> list[pathlib.Path]: """ Gets the files in `file_or_dir_path` satisfying `predicate`. If `file_or_dir_path` is a file, the single file is considered. Otherwise, all files in the directory are diff --git a/tools/python/util/get_azcopy.py b/tools/python/util/get_azcopy.py index bfcf228a956eb..32ad367b2a010 100644 --- a/tools/python/util/get_azcopy.py +++ b/tools/python/util/get_azcopy.py @@ -1,5 +1,6 @@ # Copyright (c) Microsoft Corporation. All rights reserved. # Licensed under the MIT License. +from __future__ import annotations import contextlib import logging diff --git a/tools/python/util/make_dynamic_shape_fixed.py b/tools/python/util/make_dynamic_shape_fixed.py index f4e09a8cc04a3..0d943cecb0cdf 100644 --- a/tools/python/util/make_dynamic_shape_fixed.py +++ b/tools/python/util/make_dynamic_shape_fixed.py @@ -1,6 +1,7 @@ #!/usr/bin/env python3 # Copyright (c) Microsoft Corporation. All rights reserved. # Licensed under the MIT License. +from __future__ import annotations import argparse import os @@ -49,7 +50,7 @@ def make_dynamic_shape_fixed_helper(): (args.dim_param and args.input_name) or (not args.dim_param and not args.input_name) or (args.dim_param and (not args.dim_value or args.dim_value < 1)) - or (args.input_name and (not args.input_shape or any([value < 1 for value in args.input_shape]))) + or (args.input_name and (not args.input_shape or any(value < 1 for value in args.input_shape))) ): print("Invalid usage.") parser.print_help() diff --git a/tools/python/util/mobile_helpers/test/test_usability_checker.py b/tools/python/util/mobile_helpers/test/test_usability_checker.py index 2deacfc91dd1c..7fde729aa0053 100644 --- a/tools/python/util/mobile_helpers/test/test_usability_checker.py +++ b/tools/python/util/mobile_helpers/test/test_usability_checker.py @@ -1,5 +1,6 @@ # Copyright (c) Microsoft Corporation. All rights reserved. # Licensed under the MIT License. +from __future__ import annotations import logging import pathlib diff --git a/tools/python/util/mobile_helpers/usability_checker.py b/tools/python/util/mobile_helpers/usability_checker.py index e7948c43baa49..81c3c07aa9f6c 100644 --- a/tools/python/util/mobile_helpers/usability_checker.py +++ b/tools/python/util/mobile_helpers/usability_checker.py @@ -151,23 +151,23 @@ def print_analysis(self, logger: logging.Logger, ep_name: str): if self.supported_groups: logger.info( - f'\tPartition sizes: [{", ".join([str(len(partition)) for partition in self.supported_groups])}]' + f"\tPartition sizes: [{', '.join([str(len(partition)) for partition in self.supported_groups])}]" ) # dump full groups if debug output is enabled for group in self.supported_groups: - logger.debug(f'Nodes in group: {",".join([f"{node.op_type}:{node.name}" for node in group])}') + logger.debug(f"Nodes in group: {','.join([f'{node.op_type}:{node.name}' for node in group])}") logger.info(f"Unsupported nodes due to operator={self.nodes_unsupported_due_to_op}") if self.unsupported_ops: - logger.info(f'\tUnsupported ops: {",".join(sorted(self.unsupported_ops))}') + logger.info(f"\tUnsupported ops: {','.join(sorted(self.unsupported_ops))}") caveats = self.supported_ops_checker.get_caveats() if caveats: indent = " " * 5 logger.info( "\tCaveats that have not been checked and may result in a node not actually being supported: " - f'{"".join([os.linesep + indent + caveat for caveat in caveats])}' + f"{''.join([os.linesep + indent + caveat for caveat in caveats])}" ) if self.nodes_unsupported_due_to_dynamic_input: @@ -341,7 +341,7 @@ def close_group(): continue if not is_op_supported: - unsupported_ops.add(f'{node.domain if node.domain else "ai.onnx"}:{node.op_type}') + unsupported_ops.add(f"{node.domain if node.domain else 'ai.onnx'}:{node.op_type}") num_unsupported_nodes_due_to_op += 1 if not is_input_shape_supported: @@ -349,7 +349,7 @@ def close_group(): if not is_rank_supported: num_unsupported_nodes_due_to_rank += 1 - ops_with_unsupported_rank.add(f'{node.domain if node.domain else "ai.onnx"}:{node.op_type}') + ops_with_unsupported_rank.add(f"{node.domain if node.domain else 'ai.onnx'}:{node.op_type}") if is_node_supported: num_supported_nodes += 1 @@ -569,8 +569,7 @@ def check_shapes(graph: onnx.GraphProto, logger: logging.Logger | None = None): # a model where all inputs are dynamic (results in no value_info) if not graph.value_info and not (len(graph.node) == 1 or len(dynamic_inputs) == len(graph.input)): logger.warning( - "Unable to check shapes within model. " - "ONNX shape inferencing should be run on the model prior to checking." + "Unable to check shapes within model. ONNX shape inferencing should be run on the model prior to checking." ) for vi in graph.value_info: diff --git a/tools/python/util/onnx_model_utils.py b/tools/python/util/onnx_model_utils.py index 5c970430a3a82..298e2153a9688 100644 --- a/tools/python/util/onnx_model_utils.py +++ b/tools/python/util/onnx_model_utils.py @@ -1,9 +1,9 @@ # Copyright (c) Microsoft Corporation. All rights reserved. # Licensed under the MIT License. +from __future__ import annotations import logging import pathlib -from typing import Optional import onnx from onnx import version_converter @@ -62,8 +62,8 @@ def get_opsets_imported(model: onnx.ModelProto): def update_onnx_opset( model_path: pathlib.Path, opset: int, - out_path: Optional[pathlib.Path] = None, - logger: Optional[logging.Logger] = None, + out_path: pathlib.Path | None = None, + logger: logging.Logger | None = None, ): """ Helper to update the opset of a model using onnx version_converter. Target opset must be greater than current opset. @@ -227,7 +227,7 @@ def make_input_shape_fixed(graph: onnx.GraphProto, input_name: str, fixed_shape: raise ValueError( f"Input {input_name} was not found in graph inputs. " - f'Valid input names are: {",".join([i.name for i in graph.input])}' + f"Valid input names are: {','.join([i.name for i in graph.input])}" ) @@ -284,7 +284,7 @@ def is_local_value(value): return value in producers or value in initializers or value in graph_inputs for node in graph.node: - inputs = [i for i in node.input] + inputs = list(node.input) for attr in node.attribute: if attr.HasField("g"): @@ -337,7 +337,7 @@ def get_producer_consumer_maps(graph: onnx.GraphProto): # top level graph should have no implicit inputs if implicit_inputs: raise ValueError( - f'This appears to be an invalid model with missing inputs of {",".join(sorted(implicit_inputs))}' + f"This appears to be an invalid model with missing inputs of {','.join(sorted(implicit_inputs))}" ) return node_to_producers, node_to_consumers diff --git a/tools/python/util/optimize_onnx_model.py b/tools/python/util/optimize_onnx_model.py index b7ebb54b9c8fa..c5459b2d9ff9a 100644 --- a/tools/python/util/optimize_onnx_model.py +++ b/tools/python/util/optimize_onnx_model.py @@ -1,6 +1,7 @@ #!/usr/bin/env python3 # Copyright (c) Microsoft Corporation. All rights reserved. # Licensed under the MIT License. +from __future__ import annotations import argparse import os diff --git a/tools/python/util/ort_format_model/__init__.py b/tools/python/util/ort_format_model/__init__.py index 318851642d6e5..29e8e70ed26e3 100644 --- a/tools/python/util/ort_format_model/__init__.py +++ b/tools/python/util/ort_format_model/__init__.py @@ -18,8 +18,10 @@ sys.path.append(ort_fbs_py_parent_dir) -from .operator_type_usage_processors import GloballyAllowedTypesOpTypeImplFilter # noqa: E402, F401 -from .operator_type_usage_processors import OperatorTypeUsageManager # noqa: E402, F401 -from .operator_type_usage_processors import OpTypeImplFilterInterface # noqa: E402, F401 +from .operator_type_usage_processors import ( # noqa: E402 + GloballyAllowedTypesOpTypeImplFilter, # noqa: F401 + OperatorTypeUsageManager, # noqa: F401 + OpTypeImplFilterInterface, # noqa: F401 +) from .ort_model_processor import OrtFormatModelProcessor # noqa: E402, F401 from .utils import create_config_from_models # noqa: E402, F401 diff --git a/tools/python/util/ort_format_model/operator_type_usage_processors.py b/tools/python/util/ort_format_model/operator_type_usage_processors.py index 598549c42b60a..53f7a34015060 100644 --- a/tools/python/util/ort_format_model/operator_type_usage_processors.py +++ b/tools/python/util/ort_format_model/operator_type_usage_processors.py @@ -1,8 +1,8 @@ # Copyright (c) Microsoft Corporation. All rights reserved. # Licensed under the MIT License. +from __future__ import annotations import json -import typing from abc import ABC, abstractmethod import ort_flatbuffers_py.fbs as fbs @@ -65,9 +65,7 @@ def __init__(self, domain: str, optype: str): def process_node(self, node: fbs.Node, value_name_to_typeinfo: dict): pass - def is_typed_registration_needed( - self, type_in_registration: str, globally_allowed_types: typing.Optional[typing.Set[str]] - ): + def is_typed_registration_needed(self, type_in_registration: str, globally_allowed_types: set[str] | None): """ Given the string from a kernel registration, determine if the registration is required or not. :param type_in_registration: Type string from kernel registration @@ -113,8 +111,8 @@ def __init__( optype: str, inputs: [int] = [0], # noqa: B006 outputs: [int] = [], # noqa: B006 - required_input_types: typing.Dict[int, typing.Set[str]] = {}, # noqa: B006 - required_output_types: typing.Dict[int, typing.Set[str]] = {}, # noqa: B006 + required_input_types: dict[int, set[str]] = {}, # noqa: B006 + required_output_types: dict[int, set[str]] = {}, # noqa: B006 ): """ Create DefaultTypeUsageProcessor. Types for one or more inputs and/or outputs can be tracked by the processor. @@ -186,9 +184,7 @@ def process_node(self, node: fbs.Node, value_name_to_typeinfo: dict): type_str = value_name_to_typestr(node.Outputs(o), value_name_to_typeinfo) self._output_types[o].add(type_str) - def is_typed_registration_needed( - self, type_in_registration: str, globally_allowed_types: typing.Optional[typing.Set[str]] - ): + def is_typed_registration_needed(self, type_in_registration: str, globally_allowed_types: set[str] | None): if 0 not in self._input_types: # currently all standard typed registrations are for input 0. # custom registrations can be handled by operator specific processors (e.g. OneHotProcessor below). @@ -262,9 +258,7 @@ def __init__(self, domain: str, optype: str): # init with tracking of input 1 only. super().__init__(domain, optype, inputs=[1], outputs=[]) - def is_typed_registration_needed( - self, type_in_registration: str, globally_allowed_types: typing.Optional[typing.Set[str]] - ): + def is_typed_registration_needed(self, type_in_registration: str, globally_allowed_types: set[str] | None): return self.is_input_type_enabled(type_in_registration, 1, globally_allowed_types) @@ -277,9 +271,7 @@ def __init__(self, domain: str, optype: str): # init with tracking of output 0 only. super().__init__(domain, optype, inputs=[], outputs=[0]) - def is_typed_registration_needed( - self, type_in_registration: str, globally_allowed_types: typing.Optional[typing.Set[str]] - ): + def is_typed_registration_needed(self, type_in_registration: str, globally_allowed_types: set[str] | None): return self.is_output_type_enabled(type_in_registration, 0, globally_allowed_types) @@ -301,9 +293,7 @@ def process_node(self, node: fbs.Node, value_name_to_typeinfo: dict): key = (type0, type2, type1) self._triples.add(key) - def is_typed_registration_needed( - self, type_in_registration: str, globally_allowed_types: typing.Optional[typing.Set[str]] - ): + def is_typed_registration_needed(self, type_in_registration: str, globally_allowed_types: set[str] | None): # the OneHot registration involves a concatenation of the 3 types involved reg_types = tuple([_reg_type_to_cpp_type(reg_type) for reg_type in _split_reg_types(type_in_registration)]) if globally_allowed_types is not None: @@ -633,7 +623,7 @@ class GloballyAllowedTypesOpTypeImplFilter(OpTypeImplFilterInterface): _valid_allowed_types = set(FbsTypeInfo.tensordatatype_to_string.values()) # noqa: RUF012 - def __init__(self, globally_allowed_types: typing.Set[str]): + def __init__(self, globally_allowed_types: set[str]): self._operator_processors = _create_operator_type_usage_processors() if not globally_allowed_types.issubset(self._valid_allowed_types): diff --git a/tools/python/util/ort_format_model/types.py b/tools/python/util/ort_format_model/types.py index ffeda6b2e7607..9661eb33c9279 100644 --- a/tools/python/util/ort_format_model/types.py +++ b/tools/python/util/ort_format_model/types.py @@ -6,6 +6,7 @@ class FbsTypeInfo: "Class to provide conversion between ORT flatbuffers schema values and C++ types" + tensordatatype_to_string = { # noqa: RUF012 fbs.TensorDataType.TensorDataType.FLOAT: "float", fbs.TensorDataType.TensorDataType.UINT8: "uint8_t", diff --git a/tools/python/util/reduced_build_config_parser.py b/tools/python/util/reduced_build_config_parser.py index be39562e2d60d..0afcca2388f10 100644 --- a/tools/python/util/reduced_build_config_parser.py +++ b/tools/python/util/reduced_build_config_parser.py @@ -1,5 +1,6 @@ # Copyright (c) Microsoft Corporation. All rights reserved. # Licensed under the MIT License. +from __future__ import annotations import os diff --git a/tools/python/util/run.py b/tools/python/util/run.py index 838db8f789eac..b1ebd044f3420 100644 --- a/tools/python/util/run.py +++ b/tools/python/util/run.py @@ -1,5 +1,6 @@ # Copyright (c) Microsoft Corporation. All rights reserved. # Licensed under the MIT License. +from __future__ import annotations import logging import os diff --git a/winml/lib/Api.Ort/OnnxruntimeEngine.h b/winml/lib/Api.Ort/OnnxruntimeEngine.h index 88945b75c75e4..93c097c7deaca 100644 --- a/winml/lib/Api.Ort/OnnxruntimeEngine.h +++ b/winml/lib/Api.Ort/OnnxruntimeEngine.h @@ -91,8 +91,8 @@ class OnnxruntimeEngine STDMETHOD(CreateTensorValueFromExternalD3DResource) (ID3D12Resource* resource, const int64_t* shape, size_t count, winml::TensorKind kind, _Out_ IValue** out) override; STDMETHOD(CreateTensorValueFromExternalBuffer) - (void* data, size_t size_in_bytes, const int64_t* shape, size_t count, winml::TensorKind kind, _Out_ IValue** out - ) override; + (void* data, size_t size_in_bytes, const int64_t* shape, size_t count, winml::TensorKind kind, _Out_ IValue** out) + override; STDMETHOD(CreateStringTensorValueFromDataWithCopy) (const char* const* data, size_t num_elements, const int64_t* shape, size_t count, _Out_ IValue** out) override; STDMETHOD(CreateNullValue) diff --git a/winml/lib/Api/LearningModelSession.cpp b/winml/lib/Api/LearningModelSession.cpp index 57bafda57fe54..508b77e964b4c 100644 --- a/winml/lib/Api/LearningModelSession.cpp +++ b/winml/lib/Api/LearningModelSession.cpp @@ -37,13 +37,15 @@ LearningModelSession::LearningModelSession(_winml::IEngine* engine) } LearningModelSession::LearningModelSession(winml::LearningModel const& model) try - : LearningModelSession(model, make(LearningModelDeviceKind::Default)) {} + : LearningModelSession(model, make(LearningModelDeviceKind::Default)) { +} WINML_CATCH_ALL LearningModelSession::LearningModelSession( winml::LearningModel const& model, winml::LearningModelDevice const& deviceToRunOn ) try - : LearningModelSession(model, deviceToRunOn, nullptr) {} + : LearningModelSession(model, deviceToRunOn, nullptr) { +} WINML_CATCH_ALL LearningModelSession::LearningModelSession( diff --git a/winml/test/api/raw/buffer_backed_random_access_stream_reference.h b/winml/test/api/raw/buffer_backed_random_access_stream_reference.h index 6f492bf8340c9..32ab8de12f650 100644 --- a/winml/test/api/raw/buffer_backed_random_access_stream_reference.h +++ b/winml/test/api/raw/buffer_backed_random_access_stream_reference.h @@ -116,9 +116,9 @@ struct RandomAccessStream ABI::Windows::Storage::Streams::IInputStream, ABI::Windows::Storage::Streams::IOutputStream, ABI::Windows::Foundation::IClosable> { - InspectableClass(L"WinMLTest.RandomAccessStream", BaseTrust) + InspectableClass(L"WinMLTest.RandomAccessStream", BaseTrust) - private : Microsoft::WRL::ComPtr buffer_ = nullptr; + private : Microsoft::WRL::ComPtr buffer_ = nullptr; UINT64 position_ = 0; public: @@ -266,8 +266,8 @@ struct BufferBackedRandomAccessStreamReferenceOpenReadAsync Microsoft::WRL::RuntimeClassFlags, __FIAsyncOperation_1_Windows__CStorage__CStreams__CIRandomAccessStreamWithContentType, ABI::Windows::Foundation::IAsyncInfo> { - InspectableClass(L"WinMLTest.BufferBackedRandomAccessStreamReferenceOpenReadAsync", BaseTrust) public - : Microsoft::WRL::ComPtr ras_; + InspectableClass(L"WinMLTest.BufferBackedRandomAccessStreamReferenceOpenReadAsync", BaseTrust) public + : Microsoft::WRL::ComPtr ras_; Microsoft::WRL::ComPtr> completed_handler_; diff --git a/winml/test/api/raw/weak_buffer.h b/winml/test/api/raw/weak_buffer.h index 488ba0639cc18..74e358f0972f9 100644 --- a/winml/test/api/raw/weak_buffer.h +++ b/winml/test/api/raw/weak_buffer.h @@ -18,9 +18,9 @@ struct WeakBuffer Microsoft::WRL::RuntimeClassFlags, ABI::Windows::Storage::Streams::IBuffer, Windows::Storage::Streams::IBufferByteAccess> { - InspectableClass(L"WinMLTest.WeakBuffer", BaseTrust) + InspectableClass(L"WinMLTest.WeakBuffer", BaseTrust) - private : const T* m_p_begin; + private : const T* m_p_begin; const T* m_p_end; public: