microsoft · edgchen1 · Apr 10, 2026 · Apr 10, 2026 · Apr 10, 2026 · Apr 10, 2026
diff --git a/include/onnxruntime/ep/adapter/op_kernel_info.h b/include/onnxruntime/ep/adapter/op_kernel_info.h
@@ -12,9 +12,9 @@
 #include "core/common/narrow.h"
 #include "core/common/status.h"
 #include "core/framework/config_options.h"
-#include "core/framework/op_kernel_info.h"
 #include "core/framework/tensor_shape.h"
 #include "core/framework/tensor.h"
+#include "core/session/allocator_adapters.h"
 
 #include "node.h"
 #include "kernel_def.h"
@@ -43,12 +43,10 @@ struct OpKernelInfo {
   // to manage the lifetime of the cached data.
   struct KernelInfoCache {
     explicit KernelInfoCache(const OrtKernelInfo* kernel_info) : kernel_info_(kernel_info) {
-      const auto* core_kernel_info = reinterpret_cast<const ::onnxruntime::OpKernelInfo*>(kernel_info);
-      execution_provider_ = core_kernel_info->GetExecutionProvider();
-      ort_ep_ = execution_provider_ != nullptr ? execution_provider_->GetOrtEp() : nullptr;
-      ep_impl_ = ort_ep_ != nullptr ? (static_cast<const Ep*>(ort_ep_))->EpImpl() : execution_provider_;
-
       Ort::ConstKernelInfo info{kernel_info};
+      ort_ep_ = info.GetEp();
+      ep_impl_ = ort_ep_ != nullptr ? (static_cast<const Ep*>(ort_ep_))->EpImpl() : nullptr;
+
       const size_t input_count = info.GetInputCount();
       constant_input_tensors.resize(input_count);
       for (size_t i = 0; i < input_count; ++i) {
@@ -60,7 +58,6 @@ struct OpKernelInfo {
       }
     }
     const OrtKernelInfo* kernel_info_;
-    const ::onnxruntime::IExecutionProvider* execution_provider_{};
     const OrtEp* ort_ep_{};
     const ::onnxruntime::IExecutionProvider* ep_impl_{};
     std::vector<Tensor> constant_input_tensors;
@@ -74,11 +71,10 @@ struct OpKernelInfo {
     return (static_cast<const Ep*>(cache_->ort_ep_))->GetDataTransferManager();
   }
 
-  // Delegates to the core OpKernelInfo::GetAllocator so the adapter returns
-  // exactly the same allocator the framework would provide for each OrtMemType.
   AllocatorPtr GetAllocator(OrtMemType mem_type) const {
-    const auto* core_kernel_info = reinterpret_cast<const ::onnxruntime::OpKernelInfo*>(cache_->kernel_info_);
-    return core_kernel_info->GetAllocator(mem_type);
+    OrtAllocator* ort_allocator = nullptr;
+    Ort::ThrowOnError(Ort::GetApi().KernelInfoGetAllocator(cache_->kernel_info_, mem_type, &ort_allocator));
+    return std::make_shared<IAllocatorImplWrappingOrtAllocator>(ort_allocator);
   }
 
   Node node() const noexcept {

diff --git a/plugin-ep-webgpu/README.md b/plugin-ep-webgpu/README.md
@@ -0,0 +1,47 @@
+# WebGPU Plugin Execution Provider
+
+Packaging sources for the ONNX Runtime WebGPU plugin Execution Provider (EP), distributed as a standalone artifact
+that plugs into an existing ONNX Runtime installation rather than being built into the main `onnxruntime` binary.
+
+For more information about plugin EPs, see the documentation [here](https://onnxruntime.ai/docs/execution-providers/plugin-ep-libraries/).
+
+## Contents
+
+- [`VERSION_NUMBER`](VERSION_NUMBER) — Base plugin EP version consumed by the CI pipeline. The pipeline derives the
+  final package version (release, dev) from this via
+  [`tools/ci_build/github/azure-pipelines/templates/set-plugin-build-variables-step.yml`](../tools/ci_build/github/azure-pipelines/templates/set-plugin-build-variables-step.yml).
+- [`python/`](python/) — Sources and build script for the `onnxruntime-ep-webgpu` Python wheel. See
+  [`python/README.md`](python/README.md) for build and test instructions.
+
+## How it fits together
+
+The plugin EP is built as a shared library (`onnxruntime_providers_webgpu.{dll,so,dylib}`) by the main ONNX Runtime
+build (`--use_webgpu shared_lib`). The resulting binaries are then packaged into:
+
+- A Python wheel (`onnxruntime-ep-webgpu`), built from [`python/`](python/).
+- A universal package published to the internal ORT-Nightly feed for Windows (x64 / arm64), Linux x64, and macOS
+  arm64.
+
+Packaging is driven by the `WebGPU Plugin EP Packaging Pipeline`
+([`tools/ci_build/github/azure-pipelines/plugin-webgpu-pipeline.yml`](../tools/ci_build/github/azure-pipelines/plugin-webgpu-pipeline.yml)),
+and post-build smoke tests run in the companion `WebGPU Plugin EP Test Pipeline`
+([`tools/ci_build/github/azure-pipelines/plugin-webgpu-test-pipeline.yml`](../tools/ci_build/github/azure-pipelines/plugin-webgpu-test-pipeline.yml)).
+
+## Usage
+
+Once installed, the plugin EP is registered at runtime:
+
+```python
+import onnxruntime as ort
+import onnxruntime_ep_webgpu as webgpu_ep
+
+ort.register_execution_provider_library("webgpu", webgpu_ep.get_library_path())
+
+devices = [d for d in ort.get_ep_devices() if d.ep_name == webgpu_ep.get_ep_name()]
+sess_options = ort.SessionOptions()
+sess_options.add_provider_for_devices(devices, {})
+session = ort.InferenceSession("model.onnx", sess_options=sess_options)
+```
+
+See [`python/onnxruntime_ep_webgpu/README.md`](python/onnxruntime_ep_webgpu/README.md) for the user-facing package
+documentation (this README is bundled into the wheel).
diff --git a/plugin-ep-webgpu/VERSION_NUMBER b/plugin-ep-webgpu/VERSION_NUMBER
@@ -0,0 +1 @@
+1.26.0
diff --git a/plugin-ep-webgpu/python/README.md b/plugin-ep-webgpu/python/README.md
@@ -0,0 +1,58 @@
+# WebGPU Plugin EP Python Package
+
+This directory contains the packaging source for the `onnxruntime-ep-webgpu` Python package.
+
+## Prerequisites
+
+- Python 3.11+
+- Pre-built WebGPU plugin EP binaries (from CI or a local build)
+
+Install build dependencies:
+
+```bash
+pip install -r requirements-build-wheel.txt
+```
+
+## Building the wheel
+
+```bash
+python build_wheel.py \
+  --binary_dir <path-to-built-binaries> \
+  --version <PEP-440-version> \
+  --output_dir <output-directory>
+```
+
+Example:
+
+```bash
+python build_wheel.py \
+  --binary_dir ./build/Release \
+  --version 1.26.0.dev20260410 \
+  --output_dir ./dist
+```
+
+The script combines the pre-built plugin EP binaries with the package source to produce a platform-specific wheel.
+
+## Testing
+
+Install the wheel and dependencies in a clean environment, then run the smoke test:
+
+```bash
+python -m venv test_venv
+source test_venv/bin/activate  # or test_venv\Scripts\Activate.ps1 on Windows
+pip install onnx numpy
+pip install dist/onnxruntime_ep_webgpu-*.whl  # pulls in onnxruntime>=1.24.4
+python test/test_webgpu_plugin_ep.py
+```
+
+The wheel declares a runtime dependency on the minimum compatible `onnxruntime` package, so pip will install (or
+verify) a compatible core runtime automatically.
+
+The test validates import, EP registration, device discovery, and inference (requires WebGPU-capable hardware for the
+inference portion). Set the environment variable `ORT_TEST_VERBOSE=1` to print additional diagnostic information
+(environment, available providers, discovered devices, etc.).
+
+## Versioning
+
+The package version is derived from `plugin-ep-webgpu/VERSION_NUMBER` by the packaging pipeline, which produces a
+PEP 440 version string.
diff --git a/plugin-ep-webgpu/python/build_wheel.py b/plugin-ep-webgpu/python/build_wheel.py
@@ -0,0 +1,162 @@
+#!/usr/bin/env python3
+"""Build a wheel for the onnxruntime-ep-webgpu package.
+
+Combines pre-built plugin EP binaries with the Python package source to produce
+a platform-specific wheel.
+
+Usage:
+    python build_wheel.py --binary_dir <path> --version <ver> --output_dir <path>
+"""
+
+import argparse
+import platform
+import shutil
+import subprocess
+import sys
+import tempfile
+from pathlib import Path
+
+SCRIPT_DIR = Path(__file__).parent
+
+# Patterns for binaries to include in the package
+BINARY_PATTERNS = [
+    "onnxruntime_providers_webgpu.dll",
+    "libonnxruntime_providers_webgpu.so",
+    "libonnxruntime_providers_webgpu.dylib",
+    # DXC dependencies (Windows)
+    "dxil.dll",
+    "dxcompiler.dll",
+    # Dawn shared library (if built as shared)
+    "webgpu_dawn.dll",
+    "libwebgpu_dawn.so",
+    "libwebgpu_dawn.dylib",
+]
+
+# Libraries to exclude from auditwheel bundling (user-provided drivers)
+AUDITWHEEL_EXCLUDE = [
+    "libvulkan.so.1",
+]
+
+
+def prepare_staging_dir(staging_dir: Path, binary_dir: Path, version: str):
+    """Copy the package source tree into staging_dir, copy binaries, and stamp the version."""
+    staging_dir.mkdir(parents=True, exist_ok=True)
+
+    # Copy only the files needed to build the wheel
+    shutil.copy2(SCRIPT_DIR / "pyproject.toml", staging_dir / "pyproject.toml")
+    shutil.copy2(SCRIPT_DIR / "setup.py", staging_dir / "setup.py")
+    shutil.copytree(SCRIPT_DIR / "onnxruntime_ep_webgpu", staging_dir / "onnxruntime_ep_webgpu")
+
+    # Copy plugin binaries into the package directory
+    package_dir = staging_dir / "onnxruntime_ep_webgpu"
+    copied = []
+    for pattern in BINARY_PATTERNS:
+        for src in binary_dir.glob(pattern):
+            dst = package_dir / src.name
+            print(f"Copying {src} -> {dst}")
+            shutil.copy2(src, dst)
+            copied.append(dst)
+    if not copied:
+        print(f"ERROR: No plugin binaries found in {binary_dir}", file=sys.stderr)
+        print(f"Looked for: {BINARY_PATTERNS}", file=sys.stderr)
+        sys.exit(1)
+
+    # Stamp the version in pyproject.toml
+    pyproject_path = staging_dir / "pyproject.toml"
+    content = pyproject_path.read_text(encoding="utf-8")
+    placeholder = 'version = "VERSION_PLACEHOLDER"'
+    if placeholder not in content:
+        print(f"ERROR: Version placeholder not found in pyproject.toml. Expected: {placeholder}", file=sys.stderr)
+        sys.exit(1)
+    updated = content.replace(placeholder, f'version = "{version}"')
+    pyproject_path.write_text(updated, encoding="utf-8")
+
+
+def build_wheel(source_dir: Path, wheel_dir: Path):
+    """Build the wheel using pip."""
+    wheel_dir.mkdir(parents=True, exist_ok=True)
+    cmd = [
+        sys.executable,
+        "-m",
+        "pip",
+        "wheel",
+        str(source_dir),
+        "--wheel-dir",
+        str(wheel_dir),
+        "--no-deps",
+        "--no-build-isolation",
+    ]
+    print(f"Running: {' '.join(cmd)}")
+    subprocess.check_call(cmd)
+
+
+def auditwheel_repair(wheel_dir: Path):
+    """Run auditwheel repair on Linux to produce a manylinux-compliant wheel."""
+    if platform.system() != "Linux":
+        return
+
+    raw_wheels = wheel_dir.glob("onnxruntime_ep_webgpu-*.whl")
+    if not raw_wheels:
+        return
+
+    raw_wheel_list = list(raw_wheels)
+    if not raw_wheel_list:
+        return
+
+    with tempfile.TemporaryDirectory() as repaired_dir_name:
+        repaired_dir = Path(repaired_dir_name)
+
+        for wheel in raw_wheel_list:
+            cmd = [sys.executable, "-m", "auditwheel", "repair", str(wheel), "--wheel-dir", str(repaired_dir)]
+            for lib in AUDITWHEEL_EXCLUDE:
+                cmd.extend(["--exclude", lib])
+            print(f"Running: {' '.join(cmd)}")
+            subprocess.check_call(cmd)
+            # Remove the raw wheel so only the repaired one remains
+            wheel.unlink()
+
+        # Move repaired wheels into wheel_dir
+        for repaired_wheel in repaired_dir.glob("*.whl"):
+            repaired_wheel.replace(wheel_dir / repaired_wheel.name)
+
+
+def collect_wheels(wheel_dir: Path, output_dir: Path):
+    """Copy built wheels to the output directory and verify at least one was produced."""
+    wheels = wheel_dir.glob("onnxruntime_ep_webgpu-*.whl")
+    if not wheels:
+        print("ERROR: No wheel was produced", file=sys.stderr)
+        sys.exit(1)
+
+    output_dir.mkdir(parents=True, exist_ok=True)
+
+    for wheel in wheels:
+        dest = output_dir / wheel.name
+        shutil.copy2(wheel, dest)
+        print(f"Built wheel: {dest}")
+
+
+def main():
+    parser = argparse.ArgumentParser(description="Build onnxruntime-ep-webgpu wheel")
+    parser.add_argument(
+        "--binary_dir", required=True, type=Path, help="Directory containing the built plugin EP binaries"
+    )
+    parser.add_argument("--version", required=True, help="Package version string (PEP 440 format)")
+    parser.add_argument("--output_dir", required=True, type=Path, help="Directory to place the built wheel")
+    args = parser.parse_args()
+
+    if not args.binary_dir.is_dir():
+        print(f"ERROR: Binary directory does not exist: {args.binary_dir}", file=sys.stderr)
+        sys.exit(1)
+
+    with tempfile.TemporaryDirectory(prefix="ort_webgpu_wheel_") as tmp:
+        staging_dir = Path(tmp) / "package"
+        wheel_dir = Path(tmp) / "wheels"
+
+        prepare_staging_dir(staging_dir, args.binary_dir, args.version)
+        build_wheel(staging_dir, wheel_dir)
+        auditwheel_repair(wheel_dir)
+        collect_wheels(wheel_dir, args.output_dir)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/plugin-ep-webgpu/python/onnxruntime_ep_webgpu/README.md b/plugin-ep-webgpu/python/onnxruntime_ep_webgpu/README.md
@@ -0,0 +1,31 @@
+# ONNX Runtime WebGPU Plugin Execution Provider
+
+WebGPU Execution Provider plugin for ONNX Runtime. Install alongside `onnxruntime` to enable WebGPU acceleration.
+
+## Installation
+
+```bash
+pip install onnxruntime-ep-webgpu
+```
+
+## Usage
+
+```python
+import onnxruntime as ort
+import onnxruntime_ep_webgpu as webgpu_ep
+
+# Register the plugin EP library with ONNX Runtime
+ort.register_execution_provider_library("webgpu", webgpu_ep.get_library_path())
+
+# Discover WebGPU devices
+all_devices = ort.get_ep_devices()
+webgpu_devices = [d for d in all_devices if d.ep_name == webgpu_ep.get_ep_name()]
+
+# Create a session using the WebGPU EP
+sess_options = ort.SessionOptions()
+sess_options.add_provider_for_devices(webgpu_devices, {})
+session = ort.InferenceSession("model.onnx", sess_options=sess_options)
+
+# Run inference
+output = session.run(None, {"input": input_data})
+```
diff --git a/plugin-ep-webgpu/python/onnxruntime_ep_webgpu/__init__.py b/plugin-ep-webgpu/python/onnxruntime_ep_webgpu/__init__.py
@@ -0,0 +1,43 @@
+"""ONNX Runtime WebGPU Plugin Execution Provider Python Package.
+
+Provides helper functions to locate the plugin EP shared library and
+retrieve the EP name for registration with ONNX Runtime.
+"""
+
+from __future__ import annotations
+
+import pathlib
+
+__all__ = [
+    "get_ep_name",
+    "get_ep_names",
+    "get_library_path",
+]
+
+_module_dir = pathlib.Path(__file__).parent
+
+
+def get_library_path() -> str:
+    """Return the path to the WebGPU plugin EP shared library."""
+    candidate_paths = [
+        _module_dir / "onnxruntime_providers_webgpu.dll",
+        _module_dir / "libonnxruntime_providers_webgpu.so",
+        _module_dir / "libonnxruntime_providers_webgpu.dylib",
+    ]
+    paths = [p for p in candidate_paths if p.is_file()]
+    if len(paths) != 1:
+        raise RuntimeError(
+            f"Expected exactly one WebGPU plugin EP library in {_module_dir}, "
+            f"found {len(paths)}: {[p.name for p in paths]}"
+        )
+    return str(paths[0])
+
+
+def get_ep_name() -> str:
+    """Return the WebGPU Execution Provider name."""
+    return "WebGpuExecutionProvider"
+
+
+def get_ep_names() -> list[str]:
+    """Return a list of EP names provided by this plugin."""
+    return [get_ep_name()]