diff --git a/.devops/openvino.Dockerfile b/.devops/openvino.Dockerfile
new file mode 100644
index 0000000000000..16924e3937c90
--- /dev/null
+++ b/.devops/openvino.Dockerfile
@@ -0,0 +1,134 @@
+ARG OPENVINO_VERSION_MAJOR=2025.2
+ARG OPENVINO_VERSION_FULL=2025.2.0.19140.c01cd93e24d
+ARG UBUNTU_VERSION=24.04
+
+# Optional proxy build arguments - empty by default
+ARG http_proxy=
+ARG https_proxy=
+
+## Build Image
+FROM ubuntu:${UBUNTU_VERSION} AS build
+
+# Pass proxy args to build stage
+ARG http_proxy
+ARG https_proxy
+
+RUN apt-get update && \
+ apt-get install -y --no-install-recommends \
+ ca-certificates \
+ gnupg \
+ wget \
+ git \
+ cmake \
+ ninja-build \
+ build-essential \
+ libtbb12 \
+ libcurl4-openssl-dev && \
+ rm -rf /var/lib/apt/lists/*
+
+# Install OpenVINO for Ubuntu 24.04
+ARG OPENVINO_VERSION_MAJOR
+ARG OPENVINO_VERSION_FULL
+RUN mkdir -p /opt/intel && \
+ wget https://storage.openvinotoolkit.org/repositories/openvino/packages/${OPENVINO_VERSION_MAJOR}/linux/openvino_toolkit_ubuntu24_${OPENVINO_VERSION_FULL}_x86_64.tgz && \
+ tar -xf openvino_toolkit_ubuntu24_${OPENVINO_VERSION_FULL}_x86_64.tgz && \
+ mv openvino_toolkit_ubuntu24_${OPENVINO_VERSION_FULL}_x86_64 /opt/intel/openvino_${OPENVINO_VERSION_MAJOR} && \
+ cd /opt/intel/openvino_${OPENVINO_VERSION_MAJOR} && \
+ echo "Y" | ./install_dependencies/install_openvino_dependencies.sh && \
+ cd - && \
+ ln -s /opt/intel/openvino_${OPENVINO_VERSION_MAJOR} /opt/intel/openvino
+
+ENV OpenVINO_DIR=/opt/intel/openvino
+
+WORKDIR /app
+
+COPY . .
+
+# Build Stage
+RUN bash -c "source ${OpenVINO_DIR}/setupvars.sh && \
+ cmake -B build/ReleaseOV -G Ninja \
+ -DCMAKE_BUILD_TYPE=Release \
+ -DGGML_OPENVINO=ON && \
+ cmake --build build/ReleaseOV -j$(nproc)"
+
+# Copy all necessary libraries
+RUN mkdir -p /app/lib && \
+ find build/ReleaseOV -name '*.so*' -exec cp {} /app/lib \; && \
+ find ${OpenVINO_DIR}/runtime/lib/intel64 -name '*.so*' -exec cp -P {} /app/lib \; 2>/dev/null || \
+ find ${OpenVINO_DIR}/lib/intel64 -name '*.so*' -exec cp -P {} /app/lib \;
+
+# Create runtime directories and copy binaries
+RUN mkdir -p /app/full \
+ && cp build/ReleaseOV/bin/* /app/full/ \
+ && cp *.py /app/full \
+ && cp -r gguf-py /app/full \
+ && cp -r requirements /app/full \
+ && cp requirements.txt /app/full \
+ && cp .devops/tools.sh /app/full/tools.sh
+
+## Base Runtime Image
+FROM ubuntu:${UBUNTU_VERSION} AS base
+
+# Pass proxy args to runtime stage
+ARG http_proxy
+ARG https_proxy
+
+RUN apt-get update \
+ && apt-get install -y libgomp1 libtbb12 curl\
+ && apt autoremove -y \
+ && apt clean -y \
+ && rm -rf /tmp/* /var/tmp/* \
+ && find /var/cache/apt/archives /var/lib/apt/lists -not -name lock -type f -delete \
+ && find /var/cache -type f -delete
+
+COPY --from=build /app/lib/ /app/
+
+### Full (all binaries)
+FROM base AS full
+
+ARG http_proxy
+ARG https_proxy
+
+COPY --from=build /app/full /app/
+
+WORKDIR /app
+
+RUN apt-get update && \
+ apt-get install -y --no-install-recommends \
+ git \
+ python3 \
+ python3-venv \
+ python3-pip && \
+ python3 -m venv /ov-venv && \
+ /ov-venv/bin/pip install --no-cache-dir --upgrade pip setuptools wheel && \
+ /ov-venv/bin/pip install --no-cache-dir -r requirements.txt && \
+ apt-get autoremove -y && \
+ apt-get clean && \
+ rm -rf /tmp/* /var/tmp/* && \
+ find /var/cache/apt/archives /var/lib/apt/lists -not -name lock -type f -delete && \
+ find /var/cache -type f -delete
+
+ENTRYPOINT ["/bin/bash", "-c", "source /ov-venv/bin/activate && exec /app/tools.sh \"$@\"", "--"]
+
+
+### Light, CLI only
+FROM base AS light
+
+COPY --from=build /app/full/llama-cli /app/
+
+WORKDIR /app
+
+ENTRYPOINT [ "/app/llama-cli" ]
+
+### Server, Server only
+FROM base AS server
+
+ENV LLAMA_ARG_HOST=0.0.0.0
+
+COPY --from=build /app/full/llama-server /app/
+
+WORKDIR /app
+
+HEALTHCHECK CMD [ "curl", "-f", "http://localhost:8080/health" ]
+
+ENTRYPOINT [ "/app/llama-server" ]
\ No newline at end of file
diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml
index 424b4ba786610..7892591dd2644 100644
--- a/.github/workflows/build.yml
+++ b/.github/workflows/build.yml
@@ -629,6 +629,45 @@ jobs:
-DGGML_SYCL_F16=ON
cmake --build build --config Release -j $(nproc)
+ ubuntu-24-cmake-openvino:
+ runs-on: ubuntu-24.04
+
+ steps:
+ - name: Clone
+ id: checkout
+ uses: actions/checkout@v4
+
+ - name: ccache
+ uses: hendrikmuhs/ccache-action@v1.2.16
+ with:
+ key: ubuntu-24-cmake-openvino-no-preset-v1
+ evict-old-files: 1d
+
+ - name: Dependencies
+ id: depends
+ run: |
+ export OPENVINO_VERSION_MAJOR=2025.2
+ export OPENVINO_VERSION_FULL=2025.2.0.19140.c01cd93e24d
+ sudo apt-get update
+ sudo apt-get install -y build-essential libcurl4-openssl-dev libtbb12 cmake ninja-build python3-pip curl wget tar
+ sudo mkdir -p /opt/intel
+ wget -O openvino_${OPENVINO_VERSION_MAJOR}.tgz https://storage.openvinotoolkit.org/repositories/openvino/packages/${OPENVINO_VERSION_MAJOR}/linux/openvino_toolkit_ubuntu24_${OPENVINO_VERSION_FULL}_x86_64.tgz
+ tar -xf openvino_${OPENVINO_VERSION_MAJOR}.tgz
+ sudo mv openvino_toolkit_ubuntu24_${OPENVINO_VERSION_FULL}_x86_64 /opt/intel/openvino_${OPENVINO_VERSION_MAJOR}
+ rm openvino_${OPENVINO_VERSION_MAJOR}.tgz
+ cd /opt/intel/openvino_${OPENVINO_VERSION_MAJOR}
+ echo "Y" | sudo -E ./install_dependencies/install_openvino_dependencies.sh && cd -
+ sudo ln -s /opt/intel/openvino_${OPENVINO_VERSION_MAJOR} /opt/intel/openvino
+
+ - name: Build
+ id: cmake_build
+ run: |
+ source /opt/intel/openvino/setupvars.sh
+ cmake -B build/ReleaseOV -G Ninja \
+ -DCMAKE_BUILD_TYPE=Release \
+ -DGGML_OPENVINO=ON
+ cmake --build build/ReleaseOV --config Release -j $(nproc)
+
build-linux-cross:
uses: ./.github/workflows/build-linux-cross.yml
diff --git a/.github/workflows/docker.yml b/.github/workflows/docker.yml
index bf2c8509ec14e..410562812671b 100644
--- a/.github/workflows/docker.yml
+++ b/.github/workflows/docker.yml
@@ -45,6 +45,7 @@ jobs:
- { tag: "intel", dockerfile: ".devops/intel.Dockerfile", platforms: "linux/amd64", full: true, light: true, server: true, free_disk_space: true }
- { tag: "vulkan", dockerfile: ".devops/vulkan.Dockerfile", platforms: "linux/amd64", full: true, light: true, server: true, free_disk_space: false }
- { tag: "s390x", dockerfile: ".devops/s390x.Dockerfile", platforms: "linux/s390x", full: true, light: true, server: true, free_disk_space: false }
+ - { tag: "openvino", dockerfile: ".devops/openvino.Dockerfile", platforms: "linux/amd64", full: true, light: true, server: true, free_disk_space: false }
# Note: the rocm images are failing due to a compiler error and are disabled until this is fixed to allow the workflow to complete
#- {tag: "rocm", dockerfile: ".devops/rocm.Dockerfile", platforms: "linux/amd64,linux/arm64", full: true, light: true, server: true, free_disk_space: true }
steps:
diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml
index f461456edf008..93d8e5e6d8dba 100644
--- a/.github/workflows/release.yml
+++ b/.github/workflows/release.yml
@@ -241,6 +241,63 @@ jobs:
path: llama-${{ steps.tag.outputs.name }}-bin-ubuntu-vulkan-x64.zip
name: llama-bin-ubuntu-vulkan-x64.zip
+ ubuntu-24-openvino:
+ runs-on: ubuntu-24.04
+
+ steps:
+ - name: Clone
+ id: checkout
+ uses: actions/checkout@v4
+ with:
+ fetch-depth: 0
+
+ - name: ccache
+ uses: hendrikmuhs/ccache-action@v1.2.16
+ with:
+ key: ubuntu-24-cmake-openvino-release-no-preset-v1
+ evict-old-files: 1d
+
+ - name: Dependencies
+ id: depends
+ run: |
+ export OPENVINO_VERSION_MAJOR=2025.2
+ export OPENVINO_VERSION_FULL=2025.2.0.19140.c01cd93e24d
+ sudo apt-get update
+ sudo apt-get install -y build-essential libcurl4-openssl-dev libtbb12 cmake ninja-build python3-pip curl wget tar
+ sudo mkdir -p /opt/intel
+ wget -O openvino_${OPENVINO_VERSION_MAJOR}.tgz https://storage.openvinotoolkit.org/repositories/openvino/packages/${OPENVINO_VERSION_MAJOR}/linux/openvino_toolkit_ubuntu24_${OPENVINO_VERSION_FULL}_x86_64.tgz
+ tar -xf openvino_${OPENVINO_VERSION_MAJOR}.tgz
+ sudo mv openvino_toolkit_ubuntu24_${OPENVINO_VERSION_FULL}_x86_64 /opt/intel/openvino_${OPENVINO_VERSION_MAJOR}
+ rm openvino_${OPENVINO_VERSION_MAJOR}.tgz
+ cd /opt/intel/openvino_${OPENVINO_VERSION_MAJOR}
+ echo "Y" | sudo -E ./install_dependencies/install_openvino_dependencies.sh && cd -
+ sudo ln -s /opt/intel/openvino_${OPENVINO_VERSION_MAJOR} /opt/intel/openvino
+
+ - name: Build
+ id: cmake_build
+ run: |
+ source /opt/intel/openvino/setupvars.sh
+ cmake -B build/ReleaseOV -G Ninja \
+ -DCMAKE_BUILD_TYPE=Release \
+ -DGGML_OPENVINO=ON
+ cmake --build build/ReleaseOV --config Release -j $(nproc)
+
+ - name: Determine tag name
+ id: tag
+ uses: ./.github/actions/get-tag-name
+
+ - name: Pack artifacts
+ id: pack_artifacts
+ run: |
+ cp LICENSE ./build/ReleaseOV/bin/
+ zip -r llama-${{ steps.tag.outputs.name }}-bin-ubuntu-openvino-x64.zip ./build/ReleaseOV/bin/*
+
+ - name: Upload artifacts
+ uses: actions/upload-artifact@v4
+ with:
+ path: llama-${{ steps.tag.outputs.name }}-bin-ubuntu-openvino-x64.zip
+ name: llama-bin-ubuntu-openvino-x64.zip
+
windows-cpu:
runs-on: windows-2025
diff --git a/IR.xml b/IR.xml
new file mode 100644
index 0000000000000..f5b1df8740a66
--- /dev/null
+++ b/IR.xml
@@ -0,0 +1,462 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 2
+ 128
+ 64
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1
+ 1
+ 32
+
+
+ 1
+ 1
+ 32
+
+
+
+
+
+
+
+
+ 1
+ 1
+ 2
+
+
+
+
+
+
+
+
+
+
+
+ 1
+ 1
+ 2
+
+
+ 3
+
+
+
+
+
+
+
+
+ 1
+ 1
+ 32
+
+
+ 2
+ 1
+ 1
+
+
+
+
+
+
+
+
+
+
+
+
+ 2
+ 1
+ 32
+
+
+ 1
+
+
+
+
+
+
+
+ 2
+ 1
+ 32
+
+
+
+
+
+
+
+
+
+
+
+
+ 2
+ 1
+ 32
+
+
+
+
+
+
+
+
+
+ 2
+ 128
+ 32
+
+
+ 2
+ 1
+ 32
+
+
+
+
+
+
+
+ 2
+ 1
+ 32
+
+
+
+
+
+
+
+
+ 2
+ 1
+ 32
+
+
+
+
+
+
+
+
+
+ 2
+ 128
+ 32
+
+
+ 2
+ 1
+ 32
+
+
+
+
+
+
+
+
+ 2
+ 128
+ 32
+
+
+ 2
+ 128
+ 32
+
+
+
+
+
+
+
+
+ 2
+ 128
+ 32
+
+
+ 2
+ 1
+ 32
+
+
+
+
+
+
+
+
+ 2
+ 128
+ 32
+
+
+ 2
+ 1
+ 32
+
+
+
+
+
+
+
+
+ 2
+ 128
+ 32
+
+
+ 2
+ 128
+ 32
+
+
+
+
+
+
+
+
+ 2
+ 128
+ 32
+
+
+ 2
+ 128
+ 32
+
+
+
+
+
+
+
+ 2
+ 128
+ 64
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/ci/run.sh b/ci/run.sh
index 68cbfdf2f52aa..d7b7c27ee4d84 100755
--- a/ci/run.sh
+++ b/ci/run.sh
@@ -22,6 +22,9 @@
# # with MUSA support
# GG_BUILD_MUSA=1 bash ./ci/run.sh ./tmp/results ./tmp/mnt
#
+# # with OPENVINO support
+# GG_BUILD_OPENVINO=1 GG_BUILD_LOW_PERF=1 GGML_OPENVINO_DEVICE=CPU bash ./ci/run.sh ./tmp/results ./tmp/mnt
+#
if [ -z "$2" ]; then
echo "usage: $0 "
@@ -114,6 +117,15 @@ if [ ! -z ${GG_BUILD_NO_SVE} ]; then
# arm 9 and newer enables sve by default, adjust these flags depending on the cpu used
CMAKE_EXTRA="${CMAKE_EXTRA} -DGGML_NATIVE=OFF -DGGML_CPU_ARM_ARCH=armv8.5-a+fp16+i8mm"
fi
+
+if [ ! -z ${GG_BUILD_OPENVINO} ]; then
+ if [ -z ${OpenVINO_DIR} ]; then
+ echo "OpenVINO_DIR not found, please install OpenVINO via archives and enable it by:"
+ echo "source /opt/intel/openvino/setupvars.sh"
+ exit 1
+ fi
+ CMAKE_EXTRA="${CMAKE_EXTRA} -DGGML_OPENVINO=ON -DGGML_CPU_REPACK=OFF"
+fi
## helpers
# download a file if it does not exist or if it is outdated
diff --git a/docs/build.md b/docs/build.md
index dcbcce7549ad2..e2ef8b4e08b5b 100644
--- a/docs/build.md
+++ b/docs/build.md
@@ -13,6 +13,21 @@ cd llama.cpp
The following sections describe how to build with different backends and options.
+* [CPU Build](#cpu-build)
+* [BLAS Build](#blas-build)
+* [Metal Build](#metal-build)
+* [SYCL](#sycl)
+* [CUDA](#cuda)
+* [MUSA](#musa)
+* [HIP](#hip)
+* [Vulkan](#vulkan)
+* [CANN](#cann)
+* [Arm® KleidiAI™](#arm-kleidiai)
+* [OpenCL](#opencl)
+* [Android](#android-1)
+* [OpenVINO](#openvino)
+* [Notes about GPU-accelerated backends](#notes-about-gpu-accelerated-backends)
+
## CPU Build
Build llama.cpp using `CMake`:
@@ -575,6 +590,127 @@ Follow the instructions [here](https://dawn.googlesource.com/dawn/+/refs/heads/m
To read documentation for how to build on IBM Z & LinuxONE, [click here](./build-s390x.md)
+## OpenVINO
+
+[OpenVINO](https://docs.openvino.ai/2025/index.html) is an open-source toolkit for optimizing and deploying high-performance AI inference, specifically designed for Intel hardware, including CPUs, GPUs, and NPUs, in the cloud, on-premises, and on the edge.
+The OpenVINO backend enhances performance by leveraging hardware-specific optimizations and can be enabled for use with llama.cpp.
+
+Follow the instructions below to install OpenVINO runtime and build llama.cpp with OpenVINO support.
+
+### Prerequisites
+
+- Linux or Windows system with Intel hardware (CPU, GPU, or NPU)
+- **For Intel GPU or NPU Usage**: Install the appropriate hardware drivers for your Intel GPU or NPU. For detailed instructions, see: [Additional Configurations for Hardware Acceleration](https://docs.openvino.ai/2025/get-started/install-openvino/configurations.html).
+- Git, CMake, and Ninja software tools are needed for building.
+```bash
+ sudo apt-get update
+ sudo apt-get install -y build-essential libcurl4-openssl-dev libtbb12 cmake ninja-build python3-pip curl wget tar
+```
+
+### 1. Install OpenVINO Runtime
+
+- Follow the guide to install OpenVINO Runtime from an archive file: [Linux](https://docs.openvino.ai/2025/get-started/install-openvino/install-openvino-archive-linux.html) | [Windows](https://docs.openvino.ai/2025/get-started/install-openvino/install-openvino-archive-windows.html)
+
+
+📦 Click to expand OpenVINO 2025.2 installation commands on Linux
+
+
+```bash
+export OPENVINO_VERSION_MAJOR=2025.2
+export OPENVINO_VERSION_FULL=2025.2.0.19140.c01cd93e24d
+sudo apt-get update
+sudo apt-get install -y build-essential libcurl4-openssl-dev libtbb12 cmake ninja-build python3-pip curl wget tar
+sudo mkdir -p /opt/intel
+wget -O openvino_${OPENVINO_VERSION_MAJOR}.tgz https://storage.openvinotoolkit.org/repositories/openvino/packages/${OPENVINO_VERSION_MAJOR}/linux/openvino_toolkit_ubuntu24_${OPENVINO_VERSION_FULL}_x86_64.tgz
+tar -xf openvino_${OPENVINO_VERSION_MAJOR}.tgz
+sudo mv openvino_toolkit_ubuntu24_${OPENVINO_VERSION_FULL}_x86_64 /opt/intel/openvino_${OPENVINO_VERSION_MAJOR}
+rm openvino_${OPENVINO_VERSION_MAJOR}.tgz
+cd /opt/intel/openvino_${OPENVINO_VERSION_MAJOR}
+echo "Y" | sudo -E ./install_dependencies/install_openvino_dependencies.sh && cd -
+sudo ln -s /opt/intel/openvino_${OPENVINO_VERSION_MAJOR} /opt/intel/openvino
+source /opt/intel/openvino/setupvars.sh
+```
+
+
+- Verify OpenVINO is initialized properly
+```bash
+echo $OpenVINO_DIR
+```
+
+### 2. Build llama.cpp with OpenVINO Backend
+
+Clone the OpenVINO-enabled llama.cpp fork and build it:
+
+```bash
+git clone https://github.com/ravi9/llama.cpp.git
+cd llama.cpp
+git switch dev_backend_openvino
+
+# Build with OpenVINO support
+source /opt/intel/openvino/setupvars.sh
+cmake -B build/ReleaseOV -G Ninja -DCMAKE_BUILD_TYPE=Release -DGGML_OPENVINO=ON -DGGML_CPU_REPACK=OFF
+cmake --build build/ReleaseOV --config Release -j $(nproc)
+```
+
+### 3. Download Sample Model
+
+Download models for testing:
+
+```bash
+# Create models directory
+mkdir -p ~/models/
+
+# Download model file: Llama-3.2-1B-Instruct.fp16.gguf
+wget https://huggingface.co/MaziyarPanahi/Llama-3.2-1B-Instruct-GGUF/resolve/main/Llama-3.2-1B-Instruct.fp16.gguf \
+ -O ~/models/Llama-3.2-1B-Instruct.fp16.gguf
+
+# Download model file: Phi-3-mini-4k-instruct-fp16.gguf
+wget https://huggingface.co/microsoft/Phi-3-mini-4k-instruct-gguf/resolve/main/Phi-3-mini-4k-instruct-fp16.gguf \
+ -O ~/models/Phi-3-mini-4k-instruct-fp16.gguf
+```
+
+### 4. Run inference with OpenVINO backend:
+
+When using the OpenVINO backend, the first inference token may have slightly higher latency due to on-the-fly conversion to the OpenVINO graph. Subsequent tokens and runs will be faster.
+
+```bash
+export GGML_OPENVINO_CACHE_DIR=/tmp/ov_cache
+# Default device is GPU.
+# If not set, automatically selects the first available device in priority order: GPU, CPU, NPU.
+export GGML_OPENVINO_DEVICE=GPU
+
+./build/ReleaseOV/bin/llama-simple -m ~/models/Llama-3.2-1B-Instruct.fp16.gguf -n 50 "The story of AI is "
+
+```
+
+To run in chat mode:
+```bash
+export GGML_OPENVINO_CACHE_DIR=/tmp/ov_cache
+./build/ReleaseOV/bin/llama-cli -m ~/models/Llama-3.2-1B-Instruct.fp16.gguf -n 50 "The story of AI is "
+
+```
+
+### Configuration Options
+
+Control OpenVINO behavior using these environment variables:
+
+- **`GGML_OPENVINO_DEVICE`**: Specify the target device for OpenVINO inference. If not set, automatically selects the first available device in priority order: GPU, CPU, NPU. When set to `NPU` to use Intel NPUs, it enables static compilation mode for optimal performance.
+- **`GGML_OPENVINO_CACHE_DIR`**: Directory for model caching (recommended: `/tmp/ov_cache`). If set, enables model caching in OpenVINO. Note: Not supported when using NPU devices yet.
+- **`GGML_OPENVINO_PROFILING`**: Enable execution time profiling.
+- **`GGML_OPENVINO_DUMP_CGRAPH`**: Save compute graph to `cgraph.txt`.
+- **`GGML_OPENVINO_DUMP_IR`**: Export OpenVINO IR files with timestamps.
+- **`GGML_OPENVINO_DEBUG_INPUT`**: Enable input debugging.
+- **`GGML_OPENVINO_DEBUG_OUTPUT`**: Enable output debugging.
+
+### Example with Profiling
+
+```bash
+export GGML_OPENVINO_CACHE_DIR=/tmp/ov_cache
+export GGML_OPENVINO_PROFILING=1
+
+./build/ReleaseOV/bin/llama-simple -m ~/models/Llama-3.2-1B-Instruct.fp16.gguf -n 50 "The story of AI is "
+```
+
## Notes about GPU-accelerated backends
The GPU may still be used to accelerate some parts of the computation even when using the `-ngl 0` option. You can fully disable GPU acceleration by using `--device none`.
diff --git a/ggml/CMakeLists.txt b/ggml/CMakeLists.txt
index 1a0fdb676c449..5c29df642cfb0 100644
--- a/ggml/CMakeLists.txt
+++ b/ggml/CMakeLists.txt
@@ -243,6 +243,8 @@ set (GGML_SYCL_TARGET "INTEL" CACHE STRING
set (GGML_SYCL_DEVICE_ARCH "" CACHE STRING
"ggml: sycl device architecture")
+option(GGML_OPENVINO "ggml: use OPENVINO" OFF)
+
option(GGML_OPENCL "ggml: use OpenCL" OFF)
option(GGML_OPENCL_PROFILING "ggml: use OpenCL profiling (increases overhead)" OFF)
option(GGML_OPENCL_EMBED_KERNELS "ggml: embed kernels" ON)
@@ -314,6 +316,7 @@ set(GGML_PUBLIC_HEADERS
include/ggml-sycl.h
include/ggml-vulkan.h
include/ggml-webgpu.h
+ include/ggml-openvino.h
include/gguf.h)
set_target_properties(ggml PROPERTIES PUBLIC_HEADER "${GGML_PUBLIC_HEADERS}")
diff --git a/ggml/include/ggml-openvino.h b/ggml/include/ggml-openvino.h
new file mode 100644
index 0000000000000..151c48d40d067
--- /dev/null
+++ b/ggml/include/ggml-openvino.h
@@ -0,0 +1,63 @@
+#pragma once
+
+#include "ggml.h"
+#include "ggml-backend.h"
+
+#include
+#include
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define GGML_OPENVINO_NAME "OPENVINO"
+#define GGML_OPENVINO_MAX_DEVICES 16
+
+// backend API
+GGML_BACKEND_API ggml_backend_t ggml_backend_openvino_init(int device);
+
+GGML_BACKEND_API bool ggml_backend_is_openvino(ggml_backend_t backend);
+
+// device buffer
+GGML_BACKEND_API ggml_backend_buffer_type_t ggml_backend_openvino_buffer_type(int device);
+
+// split tensor buffer that splits matrices by rows across multiple devices
+GGML_BACKEND_API ggml_backend_buffer_type_t ggml_backend_openvino_split_buffer_type(const float * tensor_split);
+
+// pinned host buffer for use with the CPU backend for faster copies between CPU
+// and GPU
+GGML_BACKEND_API ggml_backend_buffer_type_t ggml_backend_openvino_host_buffer_type(void);
+
+GGML_BACKEND_API int ggml_backend_openvino_get_device_count(void);
+// GGML_BACKEND_API void ggml_backend_openvino_get_device_description(int device, char * description,
+// size_t description_size);
+// GGML_BACKEND_API void ggml_backend_openvino_get_device_memory(int device, size_t * free, size_t * total);
+
+// GGML_BACKEND_API bool ggml_backend_openvino_register_host_buffer(void * buffer, size_t size);
+// GGML_BACKEND_API void ggml_backend_openvino_unregister_host_buffer(void * buffer);
+
+GGML_BACKEND_API ggml_backend_reg_t ggml_backend_openvino_reg(void);
+
+struct ggml_openvino_device_info {
+ int device_count;
+
+ struct openvino_device_info {
+ int cc; // compute capability
+ int nsm; // number of streaming multiprocessors
+ size_t smpb; // max. shared memory per block
+ size_t smpbo; // max. shared memory per block (with opt-in)
+ bool vmm; // virtual memory support
+ size_t vmm_granularity; // granularity of virtual memory
+ size_t total_vram;
+ };
+
+ openvino_device_info devices[GGML_OPENVINO_MAX_DEVICES] = {};
+
+ std::array default_tensor_split = {};
+};
+
+const ggml_openvino_device_info & ggml_openvino_info();
+
+#ifdef __cplusplus
+}
+#endif
diff --git a/ggml/src/CMakeLists.txt b/ggml/src/CMakeLists.txt
index c8f3d8596427c..ca9e24313e996 100644
--- a/ggml/src/CMakeLists.txt
+++ b/ggml/src/CMakeLists.txt
@@ -387,6 +387,7 @@ ggml_add_backend(Vulkan)
ggml_add_backend(WebGPU)
ggml_add_backend(zDNN)
ggml_add_backend(OpenCL)
+ggml_add_backend(OPENVINO)
foreach (target ggml-base ggml)
target_include_directories(${target} PUBLIC $ $)
diff --git a/ggml/src/ggml-backend-reg.cpp b/ggml/src/ggml-backend-reg.cpp
index 7002cb07e0015..3d048cac3ffe5 100644
--- a/ggml/src/ggml-backend-reg.cpp
+++ b/ggml/src/ggml-backend-reg.cpp
@@ -69,6 +69,10 @@
#include "ggml-cann.h"
#endif
+#ifdef GGML_USE_OPENVINO
+#include "ggml-openvino.h"
+#endif
+
// disable C++17 deprecation warning for std::codecvt_utf8
#if defined(__clang__)
# pragma clang diagnostic push
@@ -199,6 +203,9 @@ struct ggml_backend_registry {
#ifdef GGML_USE_RPC
register_backend(ggml_backend_rpc_reg());
#endif
+#ifdef GGML_USE_OPENVINO
+ register_backend(ggml_backend_openvino_reg());
+#endif
#ifdef GGML_USE_CPU
register_backend(ggml_backend_cpu_reg());
#endif
@@ -590,6 +597,7 @@ void ggml_backend_load_all_from_path(const char * dir_path) {
ggml_backend_load_best("vulkan", silent, dir_path);
ggml_backend_load_best("opencl", silent, dir_path);
ggml_backend_load_best("musa", silent, dir_path);
+ ggml_backend_load_best("openvino", silent, dir_path);
ggml_backend_load_best("cpu", silent, dir_path);
// check the environment variable GGML_BACKEND_PATH to load an out-of-tree backend
const char * backend_path = std::getenv("GGML_BACKEND_PATH");
diff --git a/ggml/src/ggml-openvino/.clang-format b/ggml/src/ggml-openvino/.clang-format
new file mode 100644
index 0000000000000..63dc2c472a95d
--- /dev/null
+++ b/ggml/src/ggml-openvino/.clang-format
@@ -0,0 +1,143 @@
+---
+# Override root .clang-format
+AlignConsecutiveAssignments: false
+AlignConsecutiveDeclarations: false
+ReferenceAlignment: Left
+PointerAlignment: Left
+Cpp11BracedListStyle: true
+AccessModifierOffset: -4
+BinPackArguments: false
+BreakBeforeBraces: Attach
+IndentCaseBlocks: false
+IndentCaseLabels: false
+
+Language: Cpp
+AlignAfterOpenBracket: Align
+AlignArrayOfStructures: Left
+AlignConsecutiveBitFields: AcrossComments
+AlignConsecutiveMacros: AcrossComments
+# AlignConsecutiveShortCaseStatements: AcrossComments
+AlignEscapedNewlines: Left # LeftWithLastLine
+AlignOperands: Align
+AlignTrailingComments:
+ Kind: Always
+ OverEmptyLines: 1
+AllowAllArgumentsOnNextLine: true
+AllowAllParametersOfDeclarationOnNextLine: false
+# AllowBreakBeforeNoexceptSpecifier: OnlyWithParen
+AllowShortBlocksOnASingleLine: Never
+AllowShortCaseLabelsOnASingleLine: false
+AllowShortFunctionsOnASingleLine: Inline
+AllowShortIfStatementsOnASingleLine: Never
+AllowShortLambdasOnASingleLine: Inline
+AllowShortLoopsOnASingleLine: false
+AlwaysBreakBeforeMultilineStrings: true
+BinPackParameters: true
+BitFieldColonSpacing: Both
+# BreakAdjacentStringLiterals: true
+BreakAfterAttributes: Never
+BreakBeforeBinaryOperators: None
+BreakBeforeInlineASMColon: OnlyMultiline
+BreakBeforeTernaryOperators: false
+# BreakBinaryOperations: Never
+BreakConstructorInitializers: AfterColon
+# BreakFunctionDefinitionParameters: false
+BreakInheritanceList: AfterComma
+BreakStringLiterals: true
+# BreakTemplateDeclarations: Yes
+ColumnLimit: 120
+CommentPragmas: '^ IWYU pragma:'
+CompactNamespaces: false
+ConstructorInitializerIndentWidth: 4
+ContinuationIndentWidth: 4
+DerivePointerAlignment: false
+DisableFormat: false
+EmptyLineBeforeAccessModifier: Leave
+EmptyLineAfterAccessModifier: Never
+ExperimentalAutoDetectBinPacking: false
+FixNamespaceComments: true
+IncludeBlocks: Regroup
+IncludeCategories:
+ - Regex: '^<.*\.h>'
+ Priority: 1
+ SortPriority: 0
+ - Regex: '^<.*'
+ Priority: 2
+ SortPriority: 0
+ - Regex: '.*'
+ Priority: 3
+ SortPriority: 0
+IncludeIsMainRegex: '([-_](test|unittest))?$'
+IncludeIsMainSourceRegex: ''
+IndentAccessModifiers: false
+IndentExternBlock: NoIndent
+IndentGotoLabels: false
+IndentPPDirectives: AfterHash
+IndentWidth: 4
+IndentWrappedFunctionNames: false
+InsertBraces: true # NOTE: may lead to incorrect formatting
+InsertNewlineAtEOF: true
+JavaScriptQuotes: Leave
+JavaScriptWrapImports: true
+KeepEmptyLinesAtTheStartOfBlocks: false
+LambdaBodyIndentation: Signature
+LineEnding: LF
+MacroBlockBegin: ''
+MacroBlockEnd: ''
+MaxEmptyLinesToKeep: 1
+NamespaceIndentation: None
+ObjCBinPackProtocolList: Auto
+ObjCBlockIndentWidth: 4
+ObjCSpaceAfterProperty: true
+ObjCSpaceBeforeProtocolList: true
+PPIndentWidth: -1
+PackConstructorInitializers: CurrentLine
+PenaltyBreakAssignment: 2
+PenaltyBreakBeforeFirstCallParameter: 1
+PenaltyBreakComment: 300
+PenaltyBreakFirstLessLess: 120
+PenaltyBreakString: 1000
+PenaltyBreakTemplateDeclaration: 10
+PenaltyExcessCharacter: 1000000
+PenaltyReturnTypeOnItsOwnLine: 200
+QualifierAlignment: Left
+#QualifierOrder: ['static', 'inline', 'friend', 'constexpr', 'const', 'volatile', 'type', 'restrict']
+RawStringFormats:
+ - Language: Cpp
+ Delimiters:
+ - cc
+ - CC
+ - cpp
+ - Cpp
+ - CPP
+ - 'c++'
+ - 'C++'
+ CanonicalDelimiter: ''
+ReflowComments: false # IndentOnly
+SeparateDefinitionBlocks: Always
+SortIncludes: CaseInsensitive
+SortUsingDeclarations: LexicographicNumeric
+SpaceAfterCStyleCast: true
+SpaceAfterLogicalNot: false
+SpaceAfterTemplateKeyword: true
+SpaceBeforeAssignmentOperators: true
+SpaceBeforeCpp11BracedList: false
+SpaceBeforeCtorInitializerColon: true
+SpaceBeforeInheritanceColon: true
+SpaceBeforeParens: ControlStatements
+SpaceBeforeRangeBasedForLoopColon: true
+SpaceInEmptyBlock: false
+SpaceInEmptyParentheses: false
+SpacesBeforeTrailingComments: 2
+SpacesInAngles: Never
+SpacesInLineCommentPrefix:
+ Minimum: 1
+ Maximum: -1
+SpacesInParentheses: false
+SpacesInSquareBrackets: false
+SpaceBeforeSquareBrackets: false
+Standard: c++17
+TabWidth: 4
+UseTab: Never
+WhitespaceSensitiveMacros: ['STRINGIZE']
+...
diff --git a/ggml/src/ggml-openvino/CMakeLists.txt b/ggml/src/ggml-openvino/CMakeLists.txt
new file mode 100644
index 0000000000000..216aa756a7a96
--- /dev/null
+++ b/ggml/src/ggml-openvino/CMakeLists.txt
@@ -0,0 +1,19 @@
+find_package(OpenVINO REQUIRED)
+
+file(GLOB_RECURSE GGML_HEADERS_OPENVINO "*.h" "*.hpp")
+file(GLOB_RECURSE GGML_SOURCES_OPENVINO "*.cpp")
+
+ggml_add_backend_library(ggml-openvino
+ ${GGML_SOURCES_OPENVINO}
+ ${GGML_HEADERS_OPENVINO}
+)
+
+target_link_libraries(ggml-openvino PRIVATE openvino::runtime)
+
+if (GGML_OPENVINO)
+ if (CMAKE_SYSTEM_PROCESSOR STREQUAL "aarch64")
+ elseif (CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64" OR CMAKE_SYSTEM_PROCESSOR STREQUAL "amd64" OR CMAKE_SYSTEM_PROCESSOR STREQUAL "AMD64")
+ else()
+ message(FATAL_ERROR "OpenVINO: OpenVINO toolkit supports x86-64 and arm64 but not ${CMAKE_SYSTEM_PROCESSOR}")
+ endif()
+endif()
diff --git a/ggml/src/ggml-openvino/ggml-decoder.cpp b/ggml/src/ggml-openvino/ggml-decoder.cpp
new file mode 100644
index 0000000000000..751fa192a4261
--- /dev/null
+++ b/ggml/src/ggml-openvino/ggml-decoder.cpp
@@ -0,0 +1,818 @@
+#include "ggml-decoder.h"
+
+#include
+#include
+
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include