From 63e9aa0723650998d660010f036e1da5b6e9fef8 Mon Sep 17 00:00:00 2001
From: edgchen1 <18449977+edgchen1@users.noreply.github.com>
Date: Fri, 10 Apr 2026 15:00:19 -0700
Subject: [PATCH 01/48] Add WebGPU plugin EP Python package infrastructure

Add standalone onnxruntime-ep-webgpu Python package that bundles the WebGPU plugin EP native binary (+ DXC deps on Windows). The package provides get_library_path() and get_ep_name() helpers for registering the EP with ONNX Runtime.

New files in plugin-ep-webgpu/: VERSION_NUMBER, pyproject.toml, setup.py, __init__.py, build_wheel.py (handles binary copying, version stamping, auditwheel repair on Linux, and wheel verification), requirements-build-wheel.txt, and a smoke test that validates import, EP registration, and inference.

Pipeline changes: added Python_Package (CPU) and Python_Test (GPU) jobs to each platform stage (Windows, Linux, macOS). Added PluginPythonPackageVersion (PEP 440) output to set-plugin-build-variables-step.yml, sourced from plugin-ep-webgpu/VERSION_NUMBER.
---
 plugin-ep-webgpu/VERSION_NUMBER               |   1 +
 plugin-ep-webgpu/python/build_wheel.py        | 158 ++++++++++++++++++
 .../python/onnxruntime_ep_webgpu/__init__.py  |  39 +++++
 plugin-ep-webgpu/python/pyproject.toml        |  17 ++
 .../python/requirements-build-wheel.txt       |   5 +
 plugin-ep-webgpu/python/setup.py              |  18 ++
 .../python/test/test_webgpu_plugin_ep.py      | 126 ++++++++++++++
 .../stages/plugin-linux-webgpu-stage.yml      |  70 ++++++++
 .../stages/plugin-mac-webgpu-stage.yml        |  74 ++++++++
 .../stages/plugin-win-webgpu-stage.yml        |  76 +++++++++
 .../set-plugin-build-variables-step.yml       |  14 +-
 11 files changed, 596 insertions(+), 2 deletions(-)
 create mode 100644 plugin-ep-webgpu/VERSION_NUMBER
 create mode 100644 plugin-ep-webgpu/python/build_wheel.py
 create mode 100644 plugin-ep-webgpu/python/onnxruntime_ep_webgpu/__init__.py
 create mode 100644 plugin-ep-webgpu/python/pyproject.toml
 create mode 100644 plugin-ep-webgpu/python/requirements-build-wheel.txt
 create mode 100644 plugin-ep-webgpu/python/setup.py
 create mode 100644 plugin-ep-webgpu/python/test/test_webgpu_plugin_ep.py
diff --git a/plugin-ep-webgpu/VERSION_NUMBER b/plugin-ep-webgpu/VERSION_NUMBER
new file mode 100644
index 0000000000000..5ff8c4f5d2ad2
--- /dev/null
+++ b/plugin-ep-webgpu/VERSION_NUMBER
@@ -0,0 +1 @@
+1.26.0
diff --git a/plugin-ep-webgpu/python/build_wheel.py b/plugin-ep-webgpu/python/build_wheel.py
new file mode 100644
index 0000000000000..69774db0b630c
--- /dev/null
+++ b/plugin-ep-webgpu/python/build_wheel.py
@@ -0,0 +1,158 @@
+#!/usr/bin/env python3
+"""Build a wheel for the onnxruntime-ep-webgpu package.
+
+This script copies plugin EP binaries from a build directory into the package
+source tree, sets the version in pyproject.toml, builds the wheel, optionally
+runs auditwheel repair (Linux), verifies the output, and cleans up.
+
+Usage:
+    python build_wheel.py --binary_dir <path> --version <ver> --output_dir <path>
+"""
+
+import argparse
+import glob
+import os
+import platform
+import shutil
+import subprocess
+import sys
+from pathlib import Path
+
+SCRIPT_DIR = Path(__file__).parent
+PACKAGE_DIR = SCRIPT_DIR / "onnxruntime_ep_webgpu"
+
+# Patterns for binaries to include in the package
+BINARY_PATTERNS = [
+    "onnxruntime_providers_webgpu.dll",
+    "libonnxruntime_providers_webgpu.so",
+    "libonnxruntime_providers_webgpu.dylib",
+    # DXC dependencies (Windows)
+    "dxil.dll",
+    "dxcompiler.dll",
+    # Dawn shared library (if built as shared)
+    "webgpu_dawn.dll",
+    "libwebgpu_dawn.so",
+    "libwebgpu_dawn.dylib",
+]
+
+# Libraries to exclude from auditwheel bundling (user-provided drivers)
+AUDITWHEEL_EXCLUDE = [
+    "libvulkan.so.1",
+]
+
+
+def copy_binaries(binary_dir: Path) -> list[Path]:
+    """Copy plugin binaries from the build directory into the package directory."""
+    copied = []
+    for pattern in BINARY_PATTERNS:
+        for src in binary_dir.glob(pattern):
+            dst = PACKAGE_DIR / src.name
+            print(f"Copying {src} -> {dst}")
+            shutil.copy2(src, dst)
+            copied.append(dst)
+    if not copied:
+        print(f"ERROR: No plugin binaries found in {binary_dir}", file=sys.stderr)
+        print(f"Looked for: {BINARY_PATTERNS}", file=sys.stderr)
+        sys.exit(1)
+    return copied
+
+
+def set_version(version: str) -> str:
+    """Set the version in pyproject.toml. Returns the original content for restoration."""
+    pyproject_path = SCRIPT_DIR / "pyproject.toml"
+    original = pyproject_path.read_text(encoding="utf-8")
+    updated = original.replace('version = "0.0.0"', f'version = "{version}"')
+    if updated == original:
+        print("WARNING: Could not find version placeholder in pyproject.toml", file=sys.stderr)
+    pyproject_path.write_text(updated, encoding="utf-8")
+    return original
+
+
+def build_wheel(output_dir: Path):
+    """Build the wheel using pip."""
+    output_dir.mkdir(parents=True, exist_ok=True)
+    cmd = [
+        sys.executable, "-m", "pip", "wheel",
+        str(SCRIPT_DIR),
+        "--wheel-dir", str(output_dir),
+        "--no-deps",
+        "--no-build-isolation",
+    ]
+    print(f"Running: {' '.join(cmd)}")
+    subprocess.check_call(cmd)
+
+
+def auditwheel_repair(output_dir: Path):
+    """Run auditwheel repair on Linux to produce a manylinux-compliant wheel."""
+    if platform.system() != "Linux":
+        return
+
+    raw_wheels = glob.glob(str(output_dir / "onnxruntime_ep_webgpu-*.whl"))
+    if not raw_wheels:
+        return
+
+    repaired_dir = output_dir / "_repaired"
+    repaired_dir.mkdir(parents=True, exist_ok=True)
+
+    for wheel in raw_wheels:
+        cmd = [sys.executable, "-m", "auditwheel", "repair", wheel,
+               "--wheel-dir", str(repaired_dir)]
+        for lib in AUDITWHEEL_EXCLUDE:
+            cmd.extend(["--exclude", lib])
+        print(f"Running: {' '.join(cmd)}")
+        subprocess.check_call(cmd)
+
+    # Replace raw wheels with repaired ones
+    for wheel in raw_wheels:
+        os.remove(wheel)
+    for repaired_wheel in repaired_dir.glob("*.whl"):
+        shutil.move(str(repaired_wheel), str(output_dir / repaired_wheel.name))
+    repaired_dir.rmdir()
+
+
+def verify_wheel(output_dir: Path):
+    """Verify that at least one wheel was produced."""
+    wheels = glob.glob(str(output_dir / "onnxruntime_ep_webgpu-*.whl"))
+    if not wheels:
+        print("ERROR: No wheel was produced", file=sys.stderr)
+        sys.exit(1)
+    for w in wheels:
+        print(f"Built wheel: {w}")
+
+
+def cleanup(copied_files: list[Path], original_pyproject: str):
+    """Remove copied binaries and restore pyproject.toml."""
+    for f in copied_files:
+        if f.exists():
+            f.unlink()
+            print(f"Cleaned up: {f}")
+    pyproject_path = SCRIPT_DIR / "pyproject.toml"
+    pyproject_path.write_text(original_pyproject, encoding="utf-8")
+
+
+def main():
+    parser = argparse.ArgumentParser(description="Build onnxruntime-ep-webgpu wheel")
+    parser.add_argument("--binary_dir", required=True, type=Path,
+                        help="Directory containing the built plugin EP binaries")
+    parser.add_argument("--version", required=True,
+                        help="Package version string (PEP 440 format)")
+    parser.add_argument("--output_dir", required=True, type=Path,
+                        help="Directory to place the built wheel")
+    args = parser.parse_args()
+
+    if not args.binary_dir.is_dir():
+        print(f"ERROR: Binary directory does not exist: {args.binary_dir}", file=sys.stderr)
+        sys.exit(1)
+
+    copied_files = copy_binaries(args.binary_dir)
+    original_pyproject = set_version(args.version)
+    try:
+        build_wheel(args.output_dir)
+        auditwheel_repair(args.output_dir)
+        verify_wheel(args.output_dir)
+    finally:
+        cleanup(copied_files, original_pyproject)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/plugin-ep-webgpu/python/onnxruntime_ep_webgpu/__init__.py b/plugin-ep-webgpu/python/onnxruntime_ep_webgpu/__init__.py
new file mode 100644
index 0000000000000..71e2dc3c15944
--- /dev/null
+++ b/plugin-ep-webgpu/python/onnxruntime_ep_webgpu/__init__.py
@@ -0,0 +1,39 @@
+"""ONNX Runtime WebGPU Plugin Execution Provider Python Package.
+
+Provides helper functions to locate the plugin EP shared library and
+retrieve the EP name for registration with ONNX Runtime.
+"""
+
+from __future__ import annotations
+
+import pathlib
+
+__all__ = ["get_library_path", "get_ep_name", "get_ep_names"]
+
+_module_dir = pathlib.Path(__file__).parent
+
+
+def get_library_path() -> str:
+    """Return the path to the WebGPU plugin EP shared library."""
+    candidate_paths = [
+        _module_dir / "onnxruntime_providers_webgpu.dll",
+        _module_dir / "libonnxruntime_providers_webgpu.so",
+        _module_dir / "libonnxruntime_providers_webgpu.dylib",
+    ]
+    paths = [p for p in candidate_paths if p.is_file()]
+    if len(paths) != 1:
+        raise RuntimeError(
+            f"Expected exactly one WebGPU plugin EP library in {_module_dir}, "
+            f"found {len(paths)}: {[p.name for p in paths]}"
+        )
+    return str(paths[0])
+
+
+def get_ep_name() -> str:
+    """Return the WebGPU Execution Provider name."""
+    return "WebGpuExecutionProvider"
+
+
+def get_ep_names() -> list[str]:
+    """Return a list of EP names provided by this plugin."""
+    return [get_ep_name()]
diff --git a/plugin-ep-webgpu/python/pyproject.toml b/plugin-ep-webgpu/python/pyproject.toml
new file mode 100644
index 0000000000000..9bbe7b4eaf63d
--- /dev/null
+++ b/plugin-ep-webgpu/python/pyproject.toml
@@ -0,0 +1,17 @@
+[build-system]
+requires = ["setuptools>=68.0", "wheel"]
+build-backend = "setuptools.build_meta"
+
+[project]
+name = "onnxruntime-ep-webgpu"
+version = "0.0.0"  # Replaced at build time by build_wheel.py
+description = "ONNX Runtime WebGPU Plugin Execution Provider"
+readme = {text = "WebGPU Execution Provider plugin for ONNX Runtime. Install alongside onnxruntime to enable WebGPU acceleration.", content-type = "text/plain"}
+license = {text = "MIT"}
+requires-python = ">=3.9"
+
+[tool.setuptools.packages.find]
+include = ["onnxruntime_ep_webgpu*"]
+
+[tool.setuptools.package-data]
+onnxruntime_ep_webgpu = ["*.dll", "*.so", "*.so.*", "*.dylib"]
diff --git a/plugin-ep-webgpu/python/requirements-build-wheel.txt b/plugin-ep-webgpu/python/requirements-build-wheel.txt
new file mode 100644
index 0000000000000..6421330bc6b8c
--- /dev/null
+++ b/plugin-ep-webgpu/python/requirements-build-wheel.txt
@@ -0,0 +1,5 @@
+setuptools>=68.0
+wheel
+# Linux-only (auditwheel + patchelf are needed for manylinux compliance)
+auditwheel; sys_platform == "linux"
+patchelf; sys_platform == "linux"
diff --git a/plugin-ep-webgpu/python/setup.py b/plugin-ep-webgpu/python/setup.py
new file mode 100644
index 0000000000000..9844068aeecfd
--- /dev/null
+++ b/plugin-ep-webgpu/python/setup.py
@@ -0,0 +1,18 @@
+"""Minimal setup.py to mark the wheel as platform-specific (non-pure).
+
+pyproject.toml alone cannot express the non-pure wheel requirement, so this
+companion setup.py defines a BinaryDistribution that ensures the wheel gets
+the correct platform tag (e.g., win_amd64, manylinux_x86_64) instead of
+py3-none-any.
+"""
+
+from setuptools import setup
+from setuptools.dist import Distribution
+
+
+class BinaryDistribution(Distribution):
+    def has_ext_modules(self):
+        return True
+
+
+setup(distclass=BinaryDistribution)
diff --git a/plugin-ep-webgpu/python/test/test_webgpu_plugin_ep.py b/plugin-ep-webgpu/python/test/test_webgpu_plugin_ep.py
new file mode 100644
index 0000000000000..026eb7fcc8953
--- /dev/null
+++ b/plugin-ep-webgpu/python/test/test_webgpu_plugin_ep.py
@@ -0,0 +1,126 @@
+#!/usr/bin/env python3
+"""Smoke test for the onnxruntime-ep-webgpu Python package.
+
+Tests:
+1. Package import and library path resolution
+2. EP registration with ONNX Runtime
+3. Device discovery
+4. Inference with a simple Mul model (requires WebGPU-capable hardware)
+
+The inference test is skipped gracefully if no WebGPU device is available
+(e.g., on CPU-only build agents).
+"""
+
+import sys
+import tempfile
+from pathlib import Path
+
+import numpy as np
+import onnx
+import onnxruntime as ort
+from onnx import TensorProto, helper
+
+
+def create_mul_model() -> str:
+    """Create a simple Mul model and return the path to the saved .onnx file."""
+    X = helper.make_tensor_value_info("x", TensorProto.FLOAT, [2, 3])
+    Y = helper.make_tensor_value_info("y", TensorProto.FLOAT, [2, 3])
+    Z = helper.make_tensor_value_info("z", TensorProto.FLOAT, [2, 3])
+
+    mul_node = helper.make_node("Mul", inputs=["x", "y"], outputs=["z"])
+
+    graph = helper.make_graph([mul_node], "mul_graph", [X, Y], [Z])
+    model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)])
+    model.ir_version = 7
+
+    model_path = Path(tempfile.mkdtemp()) / "mul.onnx"
+    onnx.save(model, str(model_path))
+    return str(model_path)
+
+
+def test_import_and_library_path():
+    """Test that the package imports and the library path is valid."""
+    import onnxruntime_ep_webgpu as webgpu_ep
+
+    lib_path = webgpu_ep.get_library_path()
+    assert Path(lib_path).is_file(), f"Library path does not exist: {lib_path}"
+    print(f"OK: Library path: {lib_path}")
+
+    ep_name = webgpu_ep.get_ep_name()
+    assert ep_name == "WebGpuExecutionProvider", f"Unexpected EP name: {ep_name}"
+    print(f"OK: EP name: {ep_name}")
+
+    ep_names = webgpu_ep.get_ep_names()
+    assert ep_names == ["WebGpuExecutionProvider"], f"Unexpected EP names: {ep_names}"
+    print(f"OK: EP names: {ep_names}")
+
+
+def test_registration_and_inference():
+    """Test EP registration, device discovery, and inference."""
+    import onnxruntime_ep_webgpu as webgpu_ep
+
+    lib_path = webgpu_ep.get_library_path()
+    ep_name = webgpu_ep.get_ep_name()
+    registration_name = "webgpu_plugin_test"
+
+    # Register the plugin EP
+    ort.register_execution_provider_library(registration_name, lib_path)
+    print(f"OK: Registered EP library as '{registration_name}'")
+
+    try:
+        # Discover devices
+        all_devices = ort.get_ep_devices()
+        webgpu_devices = [d for d in all_devices if d.ep_name == ep_name]
+        print(f"Found {len(webgpu_devices)} WebGPU device(s)")
+
+        if not webgpu_devices:
+            print("SKIP: No WebGPU devices available — skipping inference test")
+            return
+
+        # Create session with WebGPU EP
+        sess_options = ort.SessionOptions()
+        sess_options.add_provider_for_devices(webgpu_devices, {})
+        assert sess_options.has_providers(), "SessionOptions should have providers after add_provider_for_devices"
+        print("OK: Session options configured with WebGPU EP")
+
+        model_path = create_mul_model()
+        sess = ort.InferenceSession(model_path, sess_options=sess_options)
+        print("OK: InferenceSession created")
+
+        # Run inference
+        x = np.array([[1.0, 2.0, 3.0], [4.0, 5.0, 6.0]], dtype=np.float32)
+        y = np.array([[2.0, 3.0, 4.0], [5.0, 6.0, 7.0]], dtype=np.float32)
+        expected = x * y
+
+        outputs = sess.run(None, {"x": x, "y": y})
+        result = outputs[0]
+
+        np.testing.assert_allclose(result, expected, rtol=1e-5, atol=1e-5)
+        print(f"OK: Inference result matches expected output")
+
+        del sess
+        print("OK: Session released")
+
+    finally:
+        ort.unregister_execution_provider_library(registration_name)
+        print(f"OK: Unregistered EP library '{registration_name}'")
+
+
+def main():
+    print("=== WebGPU Plugin EP Python Package Test ===")
+
+    print("\n--- Test 1: Import and library path ---")
+    test_import_and_library_path()
+
+    print("\n--- Test 2: Registration and inference ---")
+    test_registration_and_inference()
+
+    print("\n=== All tests passed ===")
+
+
+if __name__ == "__main__":
+    try:
+        main()
+    except Exception as e:
+        print(f"\nFAILED: {e}", file=sys.stderr)
+        sys.exit(1)
diff --git a/tools/ci_build/github/azure-pipelines/stages/plugin-linux-webgpu-stage.yml b/tools/ci_build/github/azure-pipelines/stages/plugin-linux-webgpu-stage.yml
index 00e716ff3af26..bae9df8ee86c6 100644
--- a/tools/ci_build/github/azure-pipelines/stages/plugin-linux-webgpu-stage.yml
+++ b/tools/ci_build/github/azure-pipelines/stages/plugin-linux-webgpu-stage.yml
@@ -94,3 +94,73 @@ stages:
           vstsFeedPackagePublish: 'onnxruntime-plugin-ep-webgpu-linux-x64'
           versionOption: custom
           versionPublish: '$(PluginUniversalPackageVersion)'
+
+  # Python package build job
+  - job: Linux_plugin_webgpu_x64_Python_Package
+    dependsOn: Linux_plugin_webgpu_x64_Build
+    timeoutInMinutes: 30
+    workspace:
+      clean: all
+    pool:
+      name: ${{ parameters.machine_pool }}
+      os: linux
+    templateContext:
+      outputs:
+      - output: pipelineArtifact
+        targetPath: $(Build.ArtifactStagingDirectory)/python
+        artifactName: webgpu_plugin_python_linux_x64
+    variables:
+    - template: ../templates/common-variables.yml
+    steps:
+    - checkout: self
+      clean: true
+      submodules: none
+
+    - template: ../templates/set-plugin-build-variables-step.yml
+      parameters:
+        package_version: ${{ parameters.package_version }}
+
+    - task: DownloadPipelineArtifact@2
+      displayName: 'Download plugin build artifacts'
+      inputs:
+        artifactName: webgpu_plugin_linux_x64
+        targetPath: '$(Build.BinariesDirectory)/plugin_artifacts'
+
+    - script: |
+        set -e -x
+        python3 -m pip install -r "$(Build.SourcesDirectory)/plugin-ep-webgpu/python/requirements-build-wheel.txt"
+        python3 "$(Build.SourcesDirectory)/plugin-ep-webgpu/python/build_wheel.py" \
+          --binary_dir "$(Build.BinariesDirectory)/plugin_artifacts/bin" \
+          --version "$(PluginPythonPackageVersion)" \
+          --output_dir "$(Build.ArtifactStagingDirectory)/python"
+      displayName: 'Build Python wheel'
+
+  # Python package test job
+  - job: Linux_plugin_webgpu_x64_Python_Test
+    dependsOn: Linux_plugin_webgpu_x64_Python_Package
+    timeoutInMinutes: 30
+    workspace:
+      clean: all
+    pool:
+      name: Onnxruntime-Linux-GPU-A10
+      os: linux
+    steps:
+    - checkout: self
+      clean: true
+      submodules: none
+
+    - task: DownloadPipelineArtifact@2
+      displayName: 'Download Python wheel'
+      inputs:
+        artifactName: webgpu_plugin_python_linux_x64
+        targetPath: '$(Build.BinariesDirectory)/python_wheel'
+
+    - script: |
+        set -e -x
+        python3 -m venv "$(Build.BinariesDirectory)/test_venv"
+        source "$(Build.BinariesDirectory)/test_venv/bin/activate"
+        python3 -m pip install onnxruntime onnx numpy
+        wheel=$(find "$(Build.BinariesDirectory)/python_wheel" -name "onnxruntime_ep_webgpu-*.whl" | head -1)
+        python3 -m pip install "$wheel"
+        python3 "$(Build.SourcesDirectory)/plugin-ep-webgpu/python/test/test_webgpu_plugin_ep.py"
+      displayName: 'Install and test Python package'
diff --git a/tools/ci_build/github/azure-pipelines/stages/plugin-mac-webgpu-stage.yml b/tools/ci_build/github/azure-pipelines/stages/plugin-mac-webgpu-stage.yml
index 16e16e54fd236..23d70d0f9c459 100644
--- a/tools/ci_build/github/azure-pipelines/stages/plugin-mac-webgpu-stage.yml
+++ b/tools/ci_build/github/azure-pipelines/stages/plugin-mac-webgpu-stage.yml
@@ -109,3 +109,77 @@ stages:
         vstsFeedPackagePublish: 'onnxruntime-plugin-ep-webgpu-macos-arm64'
         versionOption: custom
         versionPublish: '$(PluginUniversalPackageVersion)'
+
+  # Python package build job
+  - job: MacOS_plugin_webgpu_arm64_Python_Package
+    dependsOn: MacOS_plugin_webgpu_arm64_Build
+    timeoutInMinutes: 30
+    workspace:
+      clean: all
+    pool:
+      name: AcesShared
+      os: macOS
+      demands:
+      - ImageOverride -equals ACES_VM_SharedPool_Sequoia
+    templateContext:
+      outputs:
+      - output: pipelineArtifact
+        targetPath: $(Build.ArtifactStagingDirectory)/python
+        artifactName: webgpu_plugin_python_macos_arm64
+    variables:
+    - template: ../templates/common-variables.yml
+    steps:
+    - checkout: self
+      clean: true
+      submodules: none
+
+    - template: ../templates/set-plugin-build-variables-step.yml
+      parameters:
+        package_version: ${{ parameters.package_version }}
+
+    - task: DownloadPipelineArtifact@2
+      displayName: 'Download plugin build artifacts'
+      inputs:
+        artifactName: webgpu_plugin_macos_arm64
+        targetPath: '$(Build.BinariesDirectory)/plugin_artifacts'
+
+    - script: |
+        set -e -x
+        python3 -m pip install -r "$(Build.SourcesDirectory)/plugin-ep-webgpu/python/requirements-build-wheel.txt"
+        python3 "$(Build.SourcesDirectory)/plugin-ep-webgpu/python/build_wheel.py" \
+          --binary_dir "$(Build.BinariesDirectory)/plugin_artifacts/bin" \
+          --version "$(PluginPythonPackageVersion)" \
+          --output_dir "$(Build.ArtifactStagingDirectory)/python"
+      displayName: 'Build Python wheel'
+
+  # Python package test job
+  - job: MacOS_plugin_webgpu_arm64_Python_Test
+    dependsOn: MacOS_plugin_webgpu_arm64_Python_Package
+    timeoutInMinutes: 30
+    workspace:
+      clean: all
+    pool:
+      name: AcesShared
+      os: macOS
+      demands:
+      - ImageOverride -equals ACES_VM_SharedPool_Sequoia
+    steps:
+    - checkout: self
+      clean: true
+      submodules: none
+
+    - task: DownloadPipelineArtifact@2
+      displayName: 'Download Python wheel'
+      inputs:
+        artifactName: webgpu_plugin_python_macos_arm64
+        targetPath: '$(Build.BinariesDirectory)/python_wheel'
+
+    - script: |
+        set -e -x
+        python3 -m venv "$(Build.BinariesDirectory)/test_venv"
+        source "$(Build.BinariesDirectory)/test_venv/bin/activate"
+        python3 -m pip install onnxruntime onnx numpy
+        wheel=$(find "$(Build.BinariesDirectory)/python_wheel" -name "onnxruntime_ep_webgpu-*.whl" | head -1)
+        python3 -m pip install "$wheel"
+        python3 "$(Build.SourcesDirectory)/plugin-ep-webgpu/python/test/test_webgpu_plugin_ep.py"
+      displayName: 'Install and test Python package'
diff --git a/tools/ci_build/github/azure-pipelines/stages/plugin-win-webgpu-stage.yml b/tools/ci_build/github/azure-pipelines/stages/plugin-win-webgpu-stage.yml
index 352ae77544e93..7d18b49e1a49f 100644
--- a/tools/ci_build/github/azure-pipelines/stages/plugin-win-webgpu-stage.yml
+++ b/tools/ci_build/github/azure-pipelines/stages/plugin-win-webgpu-stage.yml
@@ -255,3 +255,79 @@ stages:
           vstsFeedPackagePublish: 'onnxruntime-plugin-ep-webgpu-win-${{ parameters.arch }}'
           versionOption: custom
           versionPublish: '$(PluginUniversalPackageVersion)'
+
+    # Python package build job
+    - job: Win_plugin_webgpu_${{ parameters.arch }}_Python_Package
+      dependsOn: Win_plugin_webgpu_${{ parameters.arch }}_Build
+      timeoutInMinutes: 30
+      workspace:
+        clean: all
+      pool:
+        name: onnxruntime-Win-CPU-VS2022-Latest
+        os: windows
+      templateContext:
+        outputs:
+        - output: pipelineArtifact
+          targetPath: '$(Build.ArtifactStagingDirectory)\python'
+          artifactName: webgpu_plugin_python_win_${{ parameters.arch }}
+      variables:
+      - template: ../templates/common-variables.yml
+      steps:
+      - checkout: self
+        clean: true
+        submodules: none
+
+      - template: ../templates/set-plugin-build-variables-step.yml
+        parameters:
+          package_version: ${{ parameters.package_version }}
+
+      - task: DownloadPipelineArtifact@2
+        displayName: 'Download plugin build artifacts'
+        inputs:
+          artifactName: webgpu_plugin_win_${{ parameters.arch }}
+          targetPath: '$(Build.BinariesDirectory)\plugin_artifacts'
+
+      - task: PowerShell@2
+        displayName: 'Build Python wheel'
+        inputs:
+          targetType: inline
+          pwsh: true
+          script: |
+            python -m pip install -r "$(Build.SourcesDirectory)\plugin-ep-webgpu\python\requirements-build-wheel.txt"
+            python "$(Build.SourcesDirectory)\plugin-ep-webgpu\python\build_wheel.py" `
+              --binary_dir "$(Build.BinariesDirectory)\plugin_artifacts\bin" `
+              --version "$(PluginPythonPackageVersion)" `
+              --output_dir "$(Build.ArtifactStagingDirectory)\python"
+
+    # Python package test job
+    - job: Win_plugin_webgpu_${{ parameters.arch }}_Python_Test
+      dependsOn: Win_plugin_webgpu_${{ parameters.arch }}_Python_Package
+      timeoutInMinutes: 30
+      workspace:
+        clean: all
+      pool:
+        name: onnxruntime-Win2022-VS2022-webgpu-A10
+        os: windows
+      steps:
+      - checkout: self
+        clean: true
+        submodules: none
+
+      - task: DownloadPipelineArtifact@2
+        displayName: 'Download Python wheel'
+        inputs:
+          artifactName: webgpu_plugin_python_win_${{ parameters.arch }}
+          targetPath: '$(Build.BinariesDirectory)\python_wheel'
+
+      - task: PowerShell@2
+        displayName: 'Install and test Python package'
+        inputs:
+          targetType: inline
+          pwsh: true
+          script: |
+            python -m venv "$(Build.BinariesDirectory)\test_venv"
+            & "$(Build.BinariesDirectory)\test_venv\Scripts\Activate.ps1"
+            python -m pip install onnxruntime onnx numpy
+            $wheel = (Get-ChildItem "$(Build.BinariesDirectory)\python_wheel\onnxruntime_ep_webgpu-*.whl")[0]
+            python -m pip install $wheel.FullName
+            python "$(Build.SourcesDirectory)\plugin-ep-webgpu\python\test\test_webgpu_plugin_ep.py"
diff --git a/tools/ci_build/github/azure-pipelines/templates/set-plugin-build-variables-step.yml b/tools/ci_build/github/azure-pipelines/templates/set-plugin-build-variables-step.yml
index 212eca44ae3ec..cbcf6ccbfa9d1 100644
--- a/tools/ci_build/github/azure-pipelines/templates/set-plugin-build-variables-step.yml
+++ b/tools/ci_build/github/azure-pipelines/templates/set-plugin-build-variables-step.yml
@@ -20,9 +20,9 @@ steps:
       package_version = "${{ parameters.package_version }}"
 
       src_root = os.environ.get("BUILD_SOURCESDIRECTORY", "")
-      version_file = os.path.join(src_root, "VERSION_NUMBER")
+      version_file = os.path.join(src_root, "plugin-ep-webgpu", "VERSION_NUMBER")
       if not os.path.isfile(version_file):
-          print("##vso[task.logissue type=error]Cannot find VERSION_NUMBER at: {}".format(version_file))
+          print("##vso[task.logissue type=error]Cannot find plugin-ep-webgpu/VERSION_NUMBER at: {}".format(version_file))
           sys.exit(1)
 
       with open(version_file, "r") as f:
@@ -38,6 +38,7 @@ steps:
       if package_version == "release":
           version_string = original_ver
           universal_version = original_ver
+          python_version = original_ver
 
       elif package_version == "RC":
           # RC versioning is not yet implemented. Fail the build to prevent publishing
@@ -60,6 +61,7 @@ steps:
               sys.exit(1)
           version_string = "{}-dev.{}+{}".format(original_ver, date_str, commit_sha)
           universal_version = "{}-dev.{}.{}".format(original_ver, date_str, commit_sha)
+          python_version = "{}.dev{}".format(original_ver, date_str)
 
       else:
           print("##vso[task.logissue type=error]Unknown package_version '{}'. Must be 'release', 'RC', or 'dev'.".format(package_version))
@@ -67,6 +69,7 @@ steps:
 
       print("Plugin package version string: {}".format(version_string))
       print("Plugin universal package version string: {}".format(universal_version))
+      print("Plugin Python package version string: {}".format(python_version))
 
       # Validate semver 2.0.0 format
       semver_pattern = r"^(0|[1-9]\d*)\.(0|[1-9]\d*)\.(0|[1-9]\d*)(?:-((?:0|[1-9]\d*|\d*[a-zA-Z-][0-9a-zA-Z-]*)(?:\.(?:0|[1-9]\d*|\d*[a-zA-Z-][0-9a-zA-Z-]*))*))?(?:\+([0-9a-zA-Z-]+(?:\.[0-9a-zA-Z-]+)*))?$"
@@ -80,6 +83,13 @@ steps:
           print("##vso[task.logissue type=error]Universal version string '{}' is not valid semver 1.0.0.".format(universal_version))
           sys.exit(1)
 
+      # Validate Python version (PEP 440)
+      pep440_pattern = r"^([1-9][0-9]*!)?(0|[1-9][0-9]*)(\.(0|[1-9][0-9]*))*((a|b|rc)(0|[1-9][0-9]*))?(\.post(0|[1-9][0-9]*))?(\.dev(0|[1-9][0-9]*))?$"
+      if not re.match(pep440_pattern, python_version):
+          print("##vso[task.logissue type=error]Python version string '{}' is not valid PEP 440.".format(python_version))
+          sys.exit(1)
+
       print("##vso[task.setvariable variable=PluginPackageVersion]{}".format(version_string))
       print("##vso[task.setvariable variable=PluginUniversalPackageVersion]{}".format(universal_version))
+      print("##vso[task.setvariable variable=PluginPythonPackageVersion]{}".format(python_version))
       print("##vso[task.setvariable variable=PluginEpVersionDefine]onnxruntime_PLUGIN_EP_VERSION={}".format(version_string))

From 1783f761df1c2d6d91933c68cde611dae885afd5 Mon Sep 17 00:00:00 2001
From: edgchen1 <18449977+edgchen1@users.noreply.github.com>
Date: Fri, 10 Apr 2026 15:28:48 -0700
Subject: [PATCH 02/48] Move inline readme to separate README.md file

---
 plugin-ep-webgpu/python/README.md      | 31 ++++++++++++++++++++++++++
 plugin-ep-webgpu/python/pyproject.toml |  2 +-
 2 files changed, 32 insertions(+), 1 deletion(-)
 create mode 100644 plugin-ep-webgpu/python/README.md

diff --git a/plugin-ep-webgpu/python/README.md b/plugin-ep-webgpu/python/README.md
new file mode 100644
index 0000000000000..3200f0dd08ff0
--- /dev/null
+++ b/plugin-ep-webgpu/python/README.md
@@ -0,0 +1,31 @@
+# ONNX Runtime WebGPU Plugin Execution Provider
+
+WebGPU Execution Provider plugin for ONNX Runtime. Install alongside `onnxruntime` to enable WebGPU acceleration.
+
+## Installation
+
+```bash
+pip install onnxruntime-ep-webgpu
+```
+
+## Usage
+
+```python
+import onnxruntime as ort
+import onnxruntime_ep_webgpu as webgpu_ep
+
+# Register the plugin EP library with ONNX Runtime
+ort.register_execution_provider_library("webgpu", webgpu_ep.get_library_path())
+
+# Discover WebGPU devices
+all_devices = ort.get_ep_devices()
+webgpu_devices = [d for d in all_devices if d.ep_name == webgpu_ep.get_ep_name()]
+
+# Create a session using the WebGPU EP
+sess_options = ort.SessionOptions()
+sess_options.add_provider_for_devices(webgpu_devices, {})
+session = ort.InferenceSession("model.onnx", sess_options=sess_options)
+
+# Run inference
+output = session.run(None, {"input": input_data})
+```
diff --git a/plugin-ep-webgpu/python/pyproject.toml b/plugin-ep-webgpu/python/pyproject.toml
index 9bbe7b4eaf63d..5c2a702b717c4 100644
--- a/plugin-ep-webgpu/python/pyproject.toml
+++ b/plugin-ep-webgpu/python/pyproject.toml
@@ -6,7 +6,7 @@ build-backend = "setuptools.build_meta"
 name = "onnxruntime-ep-webgpu"
 version = "0.0.0"  # Replaced at build time by build_wheel.py
 description = "ONNX Runtime WebGPU Plugin Execution Provider"
-readme = {text = "WebGPU Execution Provider plugin for ONNX Runtime. Install alongside onnxruntime to enable WebGPU acceleration.", content-type = "text/plain"}
+readme = "README.md"
 license = {text = "MIT"}
 requires-python = ">=3.9"
 

From cf75ea916ef2c900b638a0ee4f3fe145b7870f41 Mon Sep 17 00:00:00 2001
From: edgchen1 <18449977+edgchen1@users.noreply.github.com>
Date: Fri, 10 Apr 2026 15:34:13 -0700
Subject: [PATCH 03/48] Separate user-facing and developer-facing READMEs

Move user-facing README (installation, usage) into onnxruntime_ep_webgpu/ so it is bundled in the wheel and shown on PyPI. Add developer-facing README in plugin-ep-webgpu/python/ with build and test instructions.
---
 plugin-ep-webgpu/python/README.md             | 67 +++++++++++++------
 .../python/onnxruntime_ep_webgpu/README.md    | 31 +++++++++
 plugin-ep-webgpu/python/pyproject.toml        |  2 +-
 3 files changed, 80 insertions(+), 20 deletions(-)
 create mode 100644 plugin-ep-webgpu/python/onnxruntime_ep_webgpu/README.md

diff --git a/plugin-ep-webgpu/python/README.md b/plugin-ep-webgpu/python/README.md
index 3200f0dd08ff0..05d60ece3041b 100644
--- a/plugin-ep-webgpu/python/README.md
+++ b/plugin-ep-webgpu/python/README.md
@@ -1,31 +1,60 @@
-# ONNX Runtime WebGPU Plugin Execution Provider
+# WebGPU Plugin EP Python Package — Build & Test
 
-WebGPU Execution Provider plugin for ONNX Runtime. Install alongside `onnxruntime` to enable WebGPU acceleration.
+This directory contains the packaging source for the `onnxruntime-ep-webgpu` Python package.
 
-## Installation
+## Prerequisites
+
+- Python 3.9+
+- Pre-built WebGPU plugin EP binaries (from CI or a local build)
+
+Install build dependencies:
+
+```bash
+pip install -r requirements-build-wheel.txt
+```
+
+## Building the wheel
 
 ```bash
-pip install onnxruntime-ep-webgpu
+python build_wheel.py \
+  --binary_dir <path-to-built-binaries> \
+  --version <PEP-440-version> \
+  --output_dir <output-directory>
 ```
 
-## Usage
+Example:
 
-```python
-import onnxruntime as ort
-import onnxruntime_ep_webgpu as webgpu_ep
+```bash
+python build_wheel.py \
+  --binary_dir /build/Release \
+  --version 1.26.0.dev20260410 \
+  --output_dir ./dist
+```
 
-# Register the plugin EP library with ONNX Runtime
-ort.register_execution_provider_library("webgpu", webgpu_ep.get_library_path())
+The script will:
+1. Copy plugin binaries into the package directory
+2. Stamp the version in `pyproject.toml`
+3. Build the wheel
+4. Run `auditwheel repair` on Linux for manylinux compliance
+5. Verify the wheel was produced
+6. Clean up copied binaries and restore `pyproject.toml`
 
-# Discover WebGPU devices
-all_devices = ort.get_ep_devices()
-webgpu_devices = [d for d in all_devices if d.ep_name == webgpu_ep.get_ep_name()]
+## Testing
 
-# Create a session using the WebGPU EP
-sess_options = ort.SessionOptions()
-sess_options.add_provider_for_devices(webgpu_devices, {})
-session = ort.InferenceSession("model.onnx", sess_options=sess_options)
+Install the wheel and dependencies in a clean environment, then run the smoke test:
 
-# Run inference
-output = session.run(None, {"input": input_data})
+```bash
+python -m venv test_venv
+source test_venv/bin/activate  # or test_venv\Scripts\Activate.ps1 on Windows
+pip install onnxruntime onnx numpy
+pip install dist/onnxruntime_ep_webgpu-*.whl
+python test/test_webgpu_plugin_ep.py
 ```
+
+The test validates import, EP registration, device discovery, and inference (requires WebGPU-capable hardware for the inference portion).
+
+## Versioning
+
+The package version is derived from `plugin-ep-webgpu/VERSION_NUMBER` by the CI pipeline (`set-plugin-build-variables-step.yml`), which produces a PEP 440 version string:
+- **Release**: `X.Y.Z`
+- **Dev**: `X.Y.Z.devYYYYMMDD`
diff --git a/plugin-ep-webgpu/python/onnxruntime_ep_webgpu/README.md b/plugin-ep-webgpu/python/onnxruntime_ep_webgpu/README.md
new file mode 100644
index 0000000000000..3200f0dd08ff0
--- /dev/null
+++ b/plugin-ep-webgpu/python/onnxruntime_ep_webgpu/README.md
@@ -0,0 +1,31 @@
+# ONNX Runtime WebGPU Plugin Execution Provider
+
+WebGPU Execution Provider plugin for ONNX Runtime. Install alongside `onnxruntime` to enable WebGPU acceleration.
+
+## Installation
+
+```bash
+pip install onnxruntime-ep-webgpu
+```
+
+## Usage
+
+```python
+import onnxruntime as ort
+import onnxruntime_ep_webgpu as webgpu_ep
+
+# Register the plugin EP library with ONNX Runtime
+ort.register_execution_provider_library("webgpu", webgpu_ep.get_library_path())
+
+# Discover WebGPU devices
+all_devices = ort.get_ep_devices()
+webgpu_devices = [d for d in all_devices if d.ep_name == webgpu_ep.get_ep_name()]
+
+# Create a session using the WebGPU EP
+sess_options = ort.SessionOptions()
+sess_options.add_provider_for_devices(webgpu_devices, {})
+session = ort.InferenceSession("model.onnx", sess_options=sess_options)
+
+# Run inference
+output = session.run(None, {"input": input_data})
+```
diff --git a/plugin-ep-webgpu/python/pyproject.toml b/plugin-ep-webgpu/python/pyproject.toml
index 5c2a702b717c4..ed30284310190 100644
--- a/plugin-ep-webgpu/python/pyproject.toml
+++ b/plugin-ep-webgpu/python/pyproject.toml
@@ -6,7 +6,7 @@ build-backend = "setuptools.build_meta"
 name = "onnxruntime-ep-webgpu"
 version = "0.0.0"  # Replaced at build time by build_wheel.py
 description = "ONNX Runtime WebGPU Plugin Execution Provider"
-readme = "README.md"
+readme = "onnxruntime_ep_webgpu/README.md"
 license = {text = "MIT"}
 requires-python = ">=3.9"
 

From 1f869ced3cf85c1a4bc38e9dc4b5a7cc6f2fd4c4 Mon Sep 17 00:00:00 2001
From: edgchen1 <18449977+edgchen1@users.noreply.github.com>
Date: Fri, 10 Apr 2026 16:42:08 -0700
Subject: [PATCH 04/48] Set Python >=3.11 and use py3-none wheel tag

The package has no CPython extension modules, only pre-built native libraries, so a single wheel works across all Python versions. Override bdist_wheel.get_tag() to produce py3-none-{platform} instead of cp3XX-cp3XX-{platform}.
---
 plugin-ep-webgpu/python/pyproject.toml |  2 +-
 plugin-ep-webgpu/python/setup.py       | 25 +++++++++++++++++++------
 2 files changed, 20 insertions(+), 7 deletions(-)

diff --git a/plugin-ep-webgpu/python/pyproject.toml b/plugin-ep-webgpu/python/pyproject.toml
index ed30284310190..abcde60ac07f4 100644
--- a/plugin-ep-webgpu/python/pyproject.toml
+++ b/plugin-ep-webgpu/python/pyproject.toml
@@ -8,7 +8,7 @@ version = "0.0.0"  # Replaced at build time by build_wheel.py
 description = "ONNX Runtime WebGPU Plugin Execution Provider"
 readme = "onnxruntime_ep_webgpu/README.md"
 license = {text = "MIT"}
-requires-python = ">=3.9"
+requires-python = ">=3.11"
 
 [tool.setuptools.packages.find]
 include = ["onnxruntime_ep_webgpu*"]
diff --git a/plugin-ep-webgpu/python/setup.py b/plugin-ep-webgpu/python/setup.py
index 9844068aeecfd..4b5b4bcf83798 100644
--- a/plugin-ep-webgpu/python/setup.py
+++ b/plugin-ep-webgpu/python/setup.py
@@ -1,18 +1,31 @@
-"""Minimal setup.py to mark the wheel as platform-specific (non-pure).
+"""Minimal setup.py to produce a platform-specific wheel.
 
-pyproject.toml alone cannot express the non-pure wheel requirement, so this
-companion setup.py defines a BinaryDistribution that ensures the wheel gets
-the correct platform tag (e.g., win_amd64, manylinux_x86_64) instead of
-py3-none-any.
+The package contains pre-built native libraries (not CPython extension modules),
+so the wheel tag should be py3-none-{platform} rather than cp3XX-cp3XX-{platform}.
+This means a single wheel works across all supported Python versions.
 """
 
 from setuptools import setup
 from setuptools.dist import Distribution
 
+try:
+    from wheel.bdist_wheel import bdist_wheel
+
+    class PlatformBdistWheel(bdist_wheel):
+        """Override wheel tags to py3-none-{platform}."""
+
+        def get_tag(self):
+            _, _, plat = super().get_tag()
+            return "py3", "none", plat
+
+    cmdclass = {"bdist_wheel": PlatformBdistWheel}
+except ImportError:
+    cmdclass = {}
+
 
 class BinaryDistribution(Distribution):
     def has_ext_modules(self):
         return True
 
 
-setup(distclass=BinaryDistribution)
+setup(distclass=BinaryDistribution, cmdclass=cmdclass)

From f3a71b261258884ece193e99b9a1287a7e577fca Mon Sep 17 00:00:00 2001
From: edgchen1 <18449977+edgchen1@users.noreply.github.com>
Date: Fri, 10 Apr 2026 16:53:52 -0700
Subject: [PATCH 05/48] Remove unnecessary try/except guard for wheel import

---
 plugin-ep-webgpu/python/setup.py | 19 +++++++------------
 1 file changed, 7 insertions(+), 12 deletions(-)

diff --git a/plugin-ep-webgpu/python/setup.py b/plugin-ep-webgpu/python/setup.py
index 4b5b4bcf83798..1408047fcb887 100644
--- a/plugin-ep-webgpu/python/setup.py
+++ b/plugin-ep-webgpu/python/setup.py
@@ -7,20 +7,15 @@
 
 from setuptools import setup
 from setuptools.dist import Distribution
+from wheel.bdist_wheel import bdist_wheel
 
-try:
-    from wheel.bdist_wheel import bdist_wheel
 
-    class PlatformBdistWheel(bdist_wheel):
-        """Override wheel tags to py3-none-{platform}."""
+class PlatformBdistWheel(bdist_wheel):
+    """Override wheel tags to py3-none-{platform}."""
 
-        def get_tag(self):
-            _, _, plat = super().get_tag()
-            return "py3", "none", plat
-
-    cmdclass = {"bdist_wheel": PlatformBdistWheel}
-except ImportError:
-    cmdclass = {}
+    def get_tag(self):
+        _, _, plat = super().get_tag()
+        return "py3", "none", plat
 
 
 class BinaryDistribution(Distribution):
@@ -28,4 +23,4 @@ def has_ext_modules(self):
         return True
 
 
-setup(distclass=BinaryDistribution, cmdclass=cmdclass)
+setup(distclass=BinaryDistribution, cmdclass={"bdist_wheel": PlatformBdistWheel})

From 41b9ce45c7bf3d7a6a9edc77d65cfd2323c5dc80 Mon Sep 17 00:00:00 2001
From: edgchen1 <18449977+edgchen1@users.noreply.github.com>
Date: Fri, 10 Apr 2026 17:38:34 -0700
Subject: [PATCH 06/48] Improve build_wheel.py: use temp dir, explicit copies,
 hard error on version stamp

- Build wheel in a temporary directory instead of mutating the source tree

- Copy only the files needed (pyproject.toml, setup.py, onnxruntime_ep_webgpu/) instead of using an exclude list

- Change version placeholder to VERSION_PLACEHOLDER and fail hard if not found

- Disable CPU EP fallback in test to ensure WebGPU EP runs the model

- Simplify docstring and README descriptions
---
 plugin-ep-webgpu/python/README.md             |  9 +-
 plugin-ep-webgpu/python/build_wheel.py        | 93 +++++++++----------
 plugin-ep-webgpu/python/pyproject.toml        |  2 +-
 .../python/test/test_webgpu_plugin_ep.py      |  1 +
 4 files changed, 50 insertions(+), 55 deletions(-)

diff --git a/plugin-ep-webgpu/python/README.md b/plugin-ep-webgpu/python/README.md
index 05d60ece3041b..b486163671e01 100644
--- a/plugin-ep-webgpu/python/README.md
+++ b/plugin-ep-webgpu/python/README.md
@@ -31,13 +31,8 @@ python build_wheel.py \
   --output_dir ./dist
 ```
 
-The script will:
-1. Copy plugin binaries into the package directory
-2. Stamp the version in `pyproject.toml`
-3. Build the wheel
-4. Run `auditwheel repair` on Linux for manylinux compliance
-5. Verify the wheel was produced
-6. Clean up copied binaries and restore `pyproject.toml`
+The script combines the pre-built plugin EP binaries with the package source to
+produce a platform-specific wheel.
 
 ## Testing
 
diff --git a/plugin-ep-webgpu/python/build_wheel.py b/plugin-ep-webgpu/python/build_wheel.py
index 69774db0b630c..45a934b5a87ea 100644
--- a/plugin-ep-webgpu/python/build_wheel.py
+++ b/plugin-ep-webgpu/python/build_wheel.py
@@ -1,9 +1,8 @@
 #!/usr/bin/env python3
 """Build a wheel for the onnxruntime-ep-webgpu package.
 
-This script copies plugin EP binaries from a build directory into the package
-source tree, sets the version in pyproject.toml, builds the wheel, optionally
-runs auditwheel repair (Linux), verifies the output, and cleans up.
+Combines pre-built plugin EP binaries with the Python package source to produce
+a platform-specific wheel.
 
 Usage:
     python build_wheel.py --binary_dir <path> --version <ver> --output_dir <path>
@@ -16,10 +15,10 @@
 import shutil
 import subprocess
 import sys
+import tempfile
 from pathlib import Path
 
 SCRIPT_DIR = Path(__file__).parent
-PACKAGE_DIR = SCRIPT_DIR / "onnxruntime_ep_webgpu"
 
 # Patterns for binaries to include in the package
 BINARY_PATTERNS = [
@@ -41,12 +40,21 @@
 ]
 
 
-def copy_binaries(binary_dir: Path) -> list[Path]:
-    """Copy plugin binaries from the build directory into the package directory."""
+def prepare_staging_dir(staging_dir: Path, binary_dir: Path, version: str):
+    """Copy the package source tree into staging_dir, copy binaries, and stamp the version."""
+    staging_dir.mkdir(parents=True, exist_ok=True)
+
+    # Copy only the files needed to build the wheel
+    shutil.copy2(SCRIPT_DIR / "pyproject.toml", staging_dir / "pyproject.toml")
+    shutil.copy2(SCRIPT_DIR / "setup.py", staging_dir / "setup.py")
+    shutil.copytree(SCRIPT_DIR / "onnxruntime_ep_webgpu", staging_dir / "onnxruntime_ep_webgpu")
+
+    # Copy plugin binaries into the package directory
+    package_dir = staging_dir / "onnxruntime_ep_webgpu"
     copied = []
     for pattern in BINARY_PATTERNS:
         for src in binary_dir.glob(pattern):
-            dst = PACKAGE_DIR / src.name
+            dst = package_dir / src.name
             print(f"Copying {src} -> {dst}")
             shutil.copy2(src, dst)
             copied.append(dst)
@@ -54,27 +62,25 @@ def copy_binaries(binary_dir: Path) -> list[Path]:
         print(f"ERROR: No plugin binaries found in {binary_dir}", file=sys.stderr)
         print(f"Looked for: {BINARY_PATTERNS}", file=sys.stderr)
         sys.exit(1)
-    return copied
-
 
-def set_version(version: str) -> str:
-    """Set the version in pyproject.toml. Returns the original content for restoration."""
-    pyproject_path = SCRIPT_DIR / "pyproject.toml"
-    original = pyproject_path.read_text(encoding="utf-8")
-    updated = original.replace('version = "0.0.0"', f'version = "{version}"')
-    if updated == original:
-        print("WARNING: Could not find version placeholder in pyproject.toml", file=sys.stderr)
+    # Stamp the version in pyproject.toml
+    pyproject_path = staging_dir / "pyproject.toml"
+    content = pyproject_path.read_text(encoding="utf-8")
+    placeholder = 'version = "VERSION_PLACEHOLDER"'
+    if placeholder not in content:
+        print(f"ERROR: Version placeholder not found in pyproject.toml. Expected: {placeholder}", file=sys.stderr)
+        sys.exit(1)
+    updated = content.replace(placeholder, f'version = "{version}"')
     pyproject_path.write_text(updated, encoding="utf-8")
-    return original
 
 
-def build_wheel(output_dir: Path):
+def build_wheel(source_dir: Path, wheel_dir: Path):
     """Build the wheel using pip."""
-    output_dir.mkdir(parents=True, exist_ok=True)
+    wheel_dir.mkdir(parents=True, exist_ok=True)
     cmd = [
         sys.executable, "-m", "pip", "wheel",
-        str(SCRIPT_DIR),
-        "--wheel-dir", str(output_dir),
+        str(source_dir),
+        "--wheel-dir", str(wheel_dir),
         "--no-deps",
         "--no-build-isolation",
     ]
@@ -82,16 +88,16 @@ def build_wheel(output_dir: Path):
     subprocess.check_call(cmd)
 
 
-def auditwheel_repair(output_dir: Path):
+def auditwheel_repair(wheel_dir: Path):
     """Run auditwheel repair on Linux to produce a manylinux-compliant wheel."""
     if platform.system() != "Linux":
         return
 
-    raw_wheels = glob.glob(str(output_dir / "onnxruntime_ep_webgpu-*.whl"))
+    raw_wheels = glob.glob(str(wheel_dir / "onnxruntime_ep_webgpu-*.whl"))
     if not raw_wheels:
         return
 
-    repaired_dir = output_dir / "_repaired"
+    repaired_dir = wheel_dir / "_repaired"
     repaired_dir.mkdir(parents=True, exist_ok=True)
 
     for wheel in raw_wheels:
@@ -106,28 +112,21 @@ def auditwheel_repair(output_dir: Path):
     for wheel in raw_wheels:
         os.remove(wheel)
     for repaired_wheel in repaired_dir.glob("*.whl"):
-        shutil.move(str(repaired_wheel), str(output_dir / repaired_wheel.name))
+        shutil.move(str(repaired_wheel), str(wheel_dir / repaired_wheel.name))
     repaired_dir.rmdir()
 
 
-def verify_wheel(output_dir: Path):
-    """Verify that at least one wheel was produced."""
-    wheels = glob.glob(str(output_dir / "onnxruntime_ep_webgpu-*.whl"))
+def collect_wheels(wheel_dir: Path, output_dir: Path):
+    """Copy built wheels to the output directory and verify at least one was produced."""
+    wheels = glob.glob(str(wheel_dir / "onnxruntime_ep_webgpu-*.whl"))
     if not wheels:
         print("ERROR: No wheel was produced", file=sys.stderr)
         sys.exit(1)
+    output_dir.mkdir(parents=True, exist_ok=True)
     for w in wheels:
-        print(f"Built wheel: {w}")
-
-
-def cleanup(copied_files: list[Path], original_pyproject: str):
-    """Remove copied binaries and restore pyproject.toml."""
-    for f in copied_files:
-        if f.exists():
-            f.unlink()
-            print(f"Cleaned up: {f}")
-    pyproject_path = SCRIPT_DIR / "pyproject.toml"
-    pyproject_path.write_text(original_pyproject, encoding="utf-8")
+        dest = output_dir / Path(w).name
+        shutil.copy2(w, dest)
+        print(f"Built wheel: {dest}")
 
 
 def main():
@@ -144,14 +143,14 @@ def main():
         print(f"ERROR: Binary directory does not exist: {args.binary_dir}", file=sys.stderr)
         sys.exit(1)
 
-    copied_files = copy_binaries(args.binary_dir)
-    original_pyproject = set_version(args.version)
-    try:
-        build_wheel(args.output_dir)
-        auditwheel_repair(args.output_dir)
-        verify_wheel(args.output_dir)
-    finally:
-        cleanup(copied_files, original_pyproject)
+    with tempfile.TemporaryDirectory(prefix="ort_webgpu_wheel_") as tmp:
+        staging_dir = Path(tmp) / "package"
+        wheel_dir = Path(tmp) / "wheels"
+
+        prepare_staging_dir(staging_dir, args.binary_dir, args.version)
+        build_wheel(staging_dir, wheel_dir)
+        auditwheel_repair(wheel_dir)
+        collect_wheels(wheel_dir, args.output_dir)
 
 
 if __name__ == "__main__":
diff --git a/plugin-ep-webgpu/python/pyproject.toml b/plugin-ep-webgpu/python/pyproject.toml
index abcde60ac07f4..d1475e624d45e 100644
--- a/plugin-ep-webgpu/python/pyproject.toml
+++ b/plugin-ep-webgpu/python/pyproject.toml
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
 
 [project]
 name = "onnxruntime-ep-webgpu"
-version = "0.0.0"  # Replaced at build time by build_wheel.py
+version = "VERSION_PLACEHOLDER"  # Replaced at build time by build_wheel.py
 description = "ONNX Runtime WebGPU Plugin Execution Provider"
 readme = "onnxruntime_ep_webgpu/README.md"
 license = {text = "MIT"}
diff --git a/plugin-ep-webgpu/python/test/test_webgpu_plugin_ep.py b/plugin-ep-webgpu/python/test/test_webgpu_plugin_ep.py
index 026eb7fcc8953..e5e9814644ed3 100644
--- a/plugin-ep-webgpu/python/test/test_webgpu_plugin_ep.py
+++ b/plugin-ep-webgpu/python/test/test_webgpu_plugin_ep.py
@@ -79,6 +79,7 @@ def test_registration_and_inference():
 
         # Create session with WebGPU EP
         sess_options = ort.SessionOptions()
+        sess_options.add_session_config_entry("session.disable_cpu_ep_fallback", "1")
         sess_options.add_provider_for_devices(webgpu_devices, {})
         assert sess_options.has_providers(), "SessionOptions should have providers after add_provider_for_devices"
         print("OK: Session options configured with WebGPU EP")

From eb27f2ee84bd4cac39fccf36583e2aedd3972f37 Mon Sep 17 00:00:00 2001
From: edgchen1 <18449977+edgchen1@users.noreply.github.com>
Date: Fri, 10 Apr 2026 17:42:25 -0700
Subject: [PATCH 07/48] update reame

---
 plugin-ep-webgpu/python/README.md | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/plugin-ep-webgpu/python/README.md b/plugin-ep-webgpu/python/README.md
index b486163671e01..1535cd76c1c30 100644
--- a/plugin-ep-webgpu/python/README.md
+++ b/plugin-ep-webgpu/python/README.md
@@ -4,7 +4,7 @@ This directory contains the packaging source for the `onnxruntime-ep-webgpu` Pyt
 
 ## Prerequisites
 
-- Python 3.9+
+- Python 3.11+
 - Pre-built WebGPU plugin EP binaries (from CI or a local build)
 
 Install build dependencies:
@@ -26,7 +26,7 @@ Example:
 
 ```bash
 python build_wheel.py \
-  --binary_dir /build/Release \
+  --binary_dir ./build/Release \
   --version 1.26.0.dev20260410 \
   --output_dir ./dist
 ```

From 1f2daa888d8ffeb41c266f1c7642f63a04324c45 Mon Sep 17 00:00:00 2001
From: edgchen1 <18449977+edgchen1@users.noreply.github.com>
Date: Fri, 10 Apr 2026 20:00:39 -0700
Subject: [PATCH 08/48] Fix Python setup in plugin EP Python package/test
 pipeline jobs

- macOS/Windows: add setup-build-tools.yml to Python Package and Test jobs

- Linux: run Python packaging and testing inside Docker for manylinux compatibility and auditwheel support

- Windows: skip Python package/test jobs for arm64 (cross-compiled, can't run on x64 agents)

- Linux: add gpu_machine_pool parameter for test job pool
---
 .../stages/plugin-linux-webgpu-stage.yml      |  47 ++++--
 .../stages/plugin-mac-webgpu-stage.yml        |   8 +
 .../stages/plugin-win-webgpu-stage.yml        | 153 ++++++++++--------
 .../build_webgpu_plugin_python_package.sh     |  38 +++++
 4 files changed, 163 insertions(+), 83 deletions(-)
 create mode 100644 tools/ci_build/github/linux/build_webgpu_plugin_python_package.sh

diff --git a/tools/ci_build/github/azure-pipelines/stages/plugin-linux-webgpu-stage.yml b/tools/ci_build/github/azure-pipelines/stages/plugin-linux-webgpu-stage.yml
index bae9df8ee86c6..b823a29ac1807 100644
--- a/tools/ci_build/github/azure-pipelines/stages/plugin-linux-webgpu-stage.yml
+++ b/tools/ci_build/github/azure-pipelines/stages/plugin-linux-webgpu-stage.yml
@@ -3,6 +3,10 @@ parameters:
   type: string
   default: 'onnxruntime-Ubuntu2404-AMD-CPU'
 
+- name: gpu_machine_pool
+  type: string
+  default: 'Onnxruntime-Linux-GPU-A10'
+
 - name: package_version
   type: string
   default: dev
@@ -120,6 +124,13 @@ stages:
       parameters:
         package_version: ${{ parameters.package_version }}
 
+    - template: ../templates/get-docker-image-steps.yml
+      parameters:
+        Dockerfile: tools/ci_build/github/linux/docker/inference/x86_64/python/cuda/Dockerfile
+        Context: tools/ci_build/github/linux/docker/inference/x86_64/python/cuda
+        DockerBuildArgs: "--build-arg BASEIMAGE=${{ parameters.docker_base_image }} --build-arg BUILD_UID=$( id -u )"
+        Repository: onnxruntimewebgpuplugin
+
     - task: DownloadPipelineArtifact@2
       displayName: 'Download plugin build artifacts'
       inputs:
@@ -128,11 +139,9 @@ stages:
 
     - script: |
         set -e -x
-        python3 -m pip install -r "$(Build.SourcesDirectory)/plugin-ep-webgpu/python/requirements-build-wheel.txt"
-        python3 "$(Build.SourcesDirectory)/plugin-ep-webgpu/python/build_wheel.py" \
-          --binary_dir "$(Build.BinariesDirectory)/plugin_artifacts/bin" \
-          --version "$(PluginPythonPackageVersion)" \
-          --output_dir "$(Build.ArtifactStagingDirectory)/python"
+        $(Build.SourcesDirectory)/tools/ci_build/github/linux/build_webgpu_plugin_python_package.sh \
+          -i onnxruntimewebgpuplugin \
+          -v "$(PluginPythonPackageVersion)"
       displayName: 'Build Python wheel'
 
   # Python package test job
@@ -142,13 +151,20 @@ stages:
     workspace:
       clean: all
     pool:
-      name: Onnxruntime-Linux-GPU-A10
+      name: ${{ parameters.gpu_machine_pool }}
       os: linux
     steps:
     - checkout: self
       clean: true
       submodules: none
 
+    - template: ../templates/get-docker-image-steps.yml
+      parameters:
+        Dockerfile: tools/ci_build/github/linux/docker/inference/x86_64/python/cuda/Dockerfile
+        Context: tools/ci_build/github/linux/docker/inference/x86_64/python/cuda
+        DockerBuildArgs: "--build-arg BASEIMAGE=${{ parameters.docker_base_image }} --build-arg BUILD_UID=$( id -u )"
+        Repository: onnxruntimewebgpuplugin
+
     - task: DownloadPipelineArtifact@2
       displayName: 'Download Python wheel'
       inputs:
@@ -157,10 +173,17 @@ stages:
 
     - script: |
         set -e -x
-        python3 -m venv "$(Build.BinariesDirectory)/test_venv"
-        source "$(Build.BinariesDirectory)/test_venv/bin/activate"
-        python3 -m pip install onnxruntime onnx numpy
-        wheel=$(find "$(Build.BinariesDirectory)/python_wheel" -name "onnxruntime_ep_webgpu-*.whl" | head -1)
-        python3 -m pip install "$wheel"
-        python3 "$(Build.SourcesDirectory)/plugin-ep-webgpu/python/test/test_webgpu_plugin_ep.py"
+        docker run --rm --gpus all \
+          --volume "$(Build.SourcesDirectory):/onnxruntime_src" \
+          --volume "$(Build.BinariesDirectory):/build" \
+          onnxruntimewebgpuplugin \
+          /bin/bash -c "
+            set -e -x
+            python3 -m venv /build/test_venv
+            source /build/test_venv/bin/activate
+            python3 -m pip install onnxruntime onnx numpy
+            wheel=\$(find /build/python_wheel -name 'onnxruntime_ep_webgpu-*.whl' | head -1)
+            python3 -m pip install \"\$wheel\"
+            python3 /onnxruntime_src/plugin-ep-webgpu/python/test/test_webgpu_plugin_ep.py
+          "
       displayName: 'Install and test Python package'
diff --git a/tools/ci_build/github/azure-pipelines/stages/plugin-mac-webgpu-stage.yml b/tools/ci_build/github/azure-pipelines/stages/plugin-mac-webgpu-stage.yml
index 23d70d0f9c459..0184a9d377431 100644
--- a/tools/ci_build/github/azure-pipelines/stages/plugin-mac-webgpu-stage.yml
+++ b/tools/ci_build/github/azure-pipelines/stages/plugin-mac-webgpu-stage.yml
@@ -133,6 +133,10 @@ stages:
       clean: true
       submodules: none
 
+    - template: ../templates/setup-build-tools.yml
+      parameters:
+        host_cpu_arch: 'arm64'
+
     - template: ../templates/set-plugin-build-variables-step.yml
       parameters:
         package_version: ${{ parameters.package_version }}
@@ -168,6 +172,10 @@ stages:
       clean: true
       submodules: none
 
+    - template: ../templates/setup-build-tools.yml
+      parameters:
+        host_cpu_arch: 'arm64'
+
     - task: DownloadPipelineArtifact@2
       displayName: 'Download Python wheel'
       inputs:
diff --git a/tools/ci_build/github/azure-pipelines/stages/plugin-win-webgpu-stage.yml b/tools/ci_build/github/azure-pipelines/stages/plugin-win-webgpu-stage.yml
index 7d18b49e1a49f..788a0d22d1be4 100644
--- a/tools/ci_build/github/azure-pipelines/stages/plugin-win-webgpu-stage.yml
+++ b/tools/ci_build/github/azure-pipelines/stages/plugin-win-webgpu-stage.yml
@@ -256,78 +256,89 @@ stages:
           versionOption: custom
           versionPublish: '$(PluginUniversalPackageVersion)'
 
-    # Python package build job
-    - job: Win_plugin_webgpu_${{ parameters.arch }}_Python_Package
-      dependsOn: Win_plugin_webgpu_${{ parameters.arch }}_Build
-      timeoutInMinutes: 30
-      workspace:
-        clean: all
-      pool:
-        name: onnxruntime-Win-CPU-VS2022-Latest
-        os: windows
-      templateContext:
-        outputs:
-        - output: pipelineArtifact
-          targetPath: '$(Build.ArtifactStagingDirectory)\python'
-          artifactName: webgpu_plugin_python_win_${{ parameters.arch }}
-      variables:
-      - template: ../templates/common-variables.yml
-      steps:
-      - checkout: self
-        clean: true
-        submodules: none
-
-      - template: ../templates/set-plugin-build-variables-step.yml
-        parameters:
-          package_version: ${{ parameters.package_version }}
+    # Python package jobs (x64 only — arm64 cross-compiled binaries can't be
+    # packaged or tested on x64 agents)
+    - ${{ if eq(parameters.arch, 'x64') }}:
+      # Python package build job
+      - job: Win_plugin_webgpu_${{ parameters.arch }}_Python_Package
+        dependsOn: Win_plugin_webgpu_${{ parameters.arch }}_Build
+        timeoutInMinutes: 30
+        workspace:
+          clean: all
+        pool:
+          name: onnxruntime-Win-CPU-VS2022-Latest
+          os: windows
+        templateContext:
+          outputs:
+          - output: pipelineArtifact
+            targetPath: '$(Build.ArtifactStagingDirectory)\python'
+            artifactName: webgpu_plugin_python_win_${{ parameters.arch }}
+        variables:
+        - template: ../templates/common-variables.yml
+        steps:
+        - checkout: self
+          clean: true
+          submodules: none
+
+        - template: ../templates/setup-build-tools.yml
+          parameters:
+            host_cpu_arch: 'x64'
+
+        - template: ../templates/set-plugin-build-variables-step.yml
+          parameters:
+            package_version: ${{ parameters.package_version }}
 
-      - task: DownloadPipelineArtifact@2
-        displayName: 'Download plugin build artifacts'
-        inputs:
-          artifactName: webgpu_plugin_win_${{ parameters.arch }}
-          targetPath: '$(Build.BinariesDirectory)\plugin_artifacts'
+        - task: DownloadPipelineArtifact@2
+          displayName: 'Download plugin build artifacts'
+          inputs:
+            artifactName: webgpu_plugin_win_${{ parameters.arch }}
+            targetPath: '$(Build.BinariesDirectory)\plugin_artifacts'
 
-      - task: PowerShell@2
-        displayName: 'Build Python wheel'
-        inputs:
-          targetType: inline
-          pwsh: true
-          script: |
-            python -m pip install -r "$(Build.SourcesDirectory)\plugin-ep-webgpu\python\requirements-build-wheel.txt"
-            python "$(Build.SourcesDirectory)\plugin-ep-webgpu\python\build_wheel.py" `
-              --binary_dir "$(Build.BinariesDirectory)\plugin_artifacts\bin" `
-              --version "$(PluginPythonPackageVersion)" `
-              --output_dir "$(Build.ArtifactStagingDirectory)\python"
-
-    # Python package test job
-    - job: Win_plugin_webgpu_${{ parameters.arch }}_Python_Test
-      dependsOn: Win_plugin_webgpu_${{ parameters.arch }}_Python_Package
-      timeoutInMinutes: 30
-      workspace:
-        clean: all
-      pool:
-        name: onnxruntime-Win2022-VS2022-webgpu-A10
-        os: windows
-      steps:
-      - checkout: self
-        clean: true
-        submodules: none
+        - task: PowerShell@2
+          displayName: 'Build Python wheel'
+          inputs:
+            targetType: inline
+            pwsh: true
+            script: |
+              python -m pip install -r "$(Build.SourcesDirectory)\plugin-ep-webgpu\python\requirements-build-wheel.txt"
+              python "$(Build.SourcesDirectory)\plugin-ep-webgpu\python\build_wheel.py" `
+                --binary_dir "$(Build.BinariesDirectory)\plugin_artifacts\bin" `
+                --version "$(PluginPythonPackageVersion)" `
+                --output_dir "$(Build.ArtifactStagingDirectory)\python"
+
+      # Python package test job
+      - job: Win_plugin_webgpu_${{ parameters.arch }}_Python_Test
+        dependsOn: Win_plugin_webgpu_${{ parameters.arch }}_Python_Package
+        timeoutInMinutes: 30
+        workspace:
+          clean: all
+        pool:
+          name: onnxruntime-Win2022-VS2022-webgpu-A10
+          os: windows
+        steps:
+        - checkout: self
+          clean: true
+          submodules: none
+
+        - template: ../templates/setup-build-tools.yml
+          parameters:
+            host_cpu_arch: 'x64'
 
-      - task: DownloadPipelineArtifact@2
-        displayName: 'Download Python wheel'
-        inputs:
-          artifactName: webgpu_plugin_python_win_${{ parameters.arch }}
-          targetPath: '$(Build.BinariesDirectory)\python_wheel'
+        - task: DownloadPipelineArtifact@2
+          displayName: 'Download Python wheel'
+          inputs:
+            artifactName: webgpu_plugin_python_win_${{ parameters.arch }}
+            targetPath: '$(Build.BinariesDirectory)\python_wheel'
 
-      - task: PowerShell@2
-        displayName: 'Install and test Python package'
-        inputs:
-          targetType: inline
-          pwsh: true
-          script: |
-            python -m venv "$(Build.BinariesDirectory)\test_venv"
-            & "$(Build.BinariesDirectory)\test_venv\Scripts\Activate.ps1"
-            python -m pip install onnxruntime onnx numpy
-            $wheel = (Get-ChildItem "$(Build.BinariesDirectory)\python_wheel\onnxruntime_ep_webgpu-*.whl")[0]
-            python -m pip install $wheel.FullName
-            python "$(Build.SourcesDirectory)\plugin-ep-webgpu\python\test\test_webgpu_plugin_ep.py"
+        - task: PowerShell@2
+          displayName: 'Install and test Python package'
+          inputs:
+            targetType: inline
+            pwsh: true
+            script: |
+              python -m venv "$(Build.BinariesDirectory)\test_venv"
+              & "$(Build.BinariesDirectory)\test_venv\Scripts\Activate.ps1"
+              python -m pip install onnxruntime onnx numpy
+              $wheel = (Get-ChildItem "$(Build.BinariesDirectory)\python_wheel\onnxruntime_ep_webgpu-*.whl")[0]
+              python -m pip install $wheel.FullName
+              python "$(Build.SourcesDirectory)\plugin-ep-webgpu\python\test\test_webgpu_plugin_ep.py"
diff --git a/tools/ci_build/github/linux/build_webgpu_plugin_python_package.sh b/tools/ci_build/github/linux/build_webgpu_plugin_python_package.sh
new file mode 100644
index 0000000000000..e9e905f87dbe5
--- /dev/null
+++ b/tools/ci_build/github/linux/build_webgpu_plugin_python_package.sh
@@ -0,0 +1,38 @@
+#!/bin/bash
+set -e -x
+
+# Build the onnxruntime-ep-webgpu Python wheel inside Docker.
+# The Docker container provides a manylinux-compatible environment
+# with the correct Python version and auditwheel support.
+
+DOCKER_IMAGE="onnxruntimewebgpuplugin"
+VERSION=""
+
+while getopts "i:v:" parameter_Option
+do case "${parameter_Option}"
+in
+i) DOCKER_IMAGE=${OPTARG};;
+v) VERSION=${OPTARG};;
+*) echo "Usage: $0 -i <docker_image> -v <version>"
+   exit 1;;
+esac
+done
+
+if [ -z "$VERSION" ]; then
+  echo "ERROR: Version is required. Use -v <version>"
+  exit 1
+fi
+
+docker run --rm \
+    --volume "${BUILD_SOURCESDIRECTORY}:/onnxruntime_src" \
+    --volume "${BUILD_BINARIESDIRECTORY}:/build" \
+    --volume "${BUILD_ARTIFACTSTAGINGDIRECTORY}:/staging" \
+    "$DOCKER_IMAGE" \
+    /bin/bash -c "
+      set -e -x
+      python3 -m pip install -r /onnxruntime_src/plugin-ep-webgpu/python/requirements-build-wheel.txt
+      python3 /onnxruntime_src/plugin-ep-webgpu/python/build_wheel.py \
+        --binary_dir /build/plugin_artifacts/bin \
+        --version ${VERSION} \
+        --output_dir /staging/python
+    "

From 742be74b8c6f21b68f5fa4bdd371512c1ad72c5a Mon Sep 17 00:00:00 2001
From: edgchen1 <18449977+edgchen1@users.noreply.github.com>
Date: Mon, 13 Apr 2026 14:32:45 -0700
Subject: [PATCH 09/48] make
 tools/ci_build/github/linux/build_webgpu_plugin_python_package.sh executable

---
 tools/ci_build/github/linux/build_webgpu_plugin_python_package.sh | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 mode change 100644 => 100755 tools/ci_build/github/linux/build_webgpu_plugin_python_package.sh

diff --git a/tools/ci_build/github/linux/build_webgpu_plugin_python_package.sh b/tools/ci_build/github/linux/build_webgpu_plugin_python_package.sh
old mode 100644
new mode 100755

From 5c25305d72b9fa9e35c4c84f29d2e2c7882caf9c Mon Sep 17 00:00:00 2001
From: edgchen1 <18449977+edgchen1@users.noreply.github.com>
Date: Mon, 13 Apr 2026 14:43:58 -0700
Subject: [PATCH 10/48] Add debug logging to WebGPU plugin EP test script

Print environment info (Python version, platform, ORT version, relevant env vars), package directory contents, library file size, device enumeration details, session providers, and full tracebacks on failure.
---
 .../python/test/test_webgpu_plugin_ep.py      | 31 +++++++++++++++++++
 1 file changed, 31 insertions(+)

diff --git a/plugin-ep-webgpu/python/test/test_webgpu_plugin_ep.py b/plugin-ep-webgpu/python/test/test_webgpu_plugin_ep.py
index e5e9814644ed3..382c4ddc880e5 100644
--- a/plugin-ep-webgpu/python/test/test_webgpu_plugin_ep.py
+++ b/plugin-ep-webgpu/python/test/test_webgpu_plugin_ep.py
@@ -11,8 +11,11 @@
 (e.g., on CPU-only build agents).
 """
 
+import os
+import platform
 import sys
 import tempfile
+import traceback
 from pathlib import Path
 
 import numpy as np
@@ -38,10 +41,29 @@ def create_mul_model() -> str:
     return str(model_path)
 
 
+def print_environment_info():
+    """Print diagnostic information about the runtime environment."""
+    print(f"  Python: {sys.version}")
+    print(f"  Platform: {platform.platform()}")
+    print(f"  Architecture: {platform.machine()}")
+    print(f"  ONNX Runtime version: {ort.__version__}")
+    print(f"  ONNX Runtime location: {ort.__file__}")
+    print(f"  Available providers (built-in): {ort.get_available_providers()}")
+    # Print relevant environment variables
+    for var in sorted(os.environ):
+        lower = var.lower()
+        if any(kw in lower for kw in ["onnx", "ort", "gpu", "cuda", "vulkan", "webgpu", "dawn", "path", "ld_library"]):
+            print(f"  ENV {var}={os.environ[var]}")
+
+
 def test_import_and_library_path():
     """Test that the package imports and the library path is valid."""
     import onnxruntime_ep_webgpu as webgpu_ep
 
+    print(f"  Package location: {webgpu_ep.__file__}")
+    pkg_dir = Path(webgpu_ep.__file__).parent
+    print(f"  Package directory contents: {sorted(p.name for p in pkg_dir.iterdir())}")
+
     lib_path = webgpu_ep.get_library_path()
     assert Path(lib_path).is_file(), f"Library path does not exist: {lib_path}"
     print(f"OK: Library path: {lib_path}")
@@ -64,12 +86,15 @@ def test_registration_and_inference():
     registration_name = "webgpu_plugin_test"
 
     # Register the plugin EP
+    print(f"  Registering library: {lib_path}")
+    print(f"  Library file size: {Path(lib_path).stat().st_size} bytes")
     ort.register_execution_provider_library(registration_name, lib_path)
     print(f"OK: Registered EP library as '{registration_name}'")
 
     try:
         # Discover devices
         all_devices = ort.get_ep_devices()
+        print(f"  All devices: {[(d.ep_name, getattr(d, 'device_id', 'N/A')) for d in all_devices]}")
         webgpu_devices = [d for d in all_devices if d.ep_name == ep_name]
         print(f"Found {len(webgpu_devices)} WebGPU device(s)")
 
@@ -85,7 +110,9 @@ def test_registration_and_inference():
         print("OK: Session options configured with WebGPU EP")
 
         model_path = create_mul_model()
+        print(f"  Model path: {model_path}")
         sess = ort.InferenceSession(model_path, sess_options=sess_options)
+        print(f"  Session providers: {sess.get_providers()}")
         print("OK: InferenceSession created")
 
         # Run inference
@@ -110,6 +137,9 @@ def test_registration_and_inference():
 def main():
     print("=== WebGPU Plugin EP Python Package Test ===")
 
+    print("\n--- Environment ---")
+    print_environment_info()
+
     print("\n--- Test 1: Import and library path ---")
     test_import_and_library_path()
 
@@ -124,4 +154,5 @@ def main():
         main()
     except Exception as e:
         print(f"\nFAILED: {e}", file=sys.stderr)
+        traceback.print_exc()
         sys.exit(1)

From 312f277e30c46e02cbb981d3af058df53d1754a1 Mon Sep 17 00:00:00 2001
From: edgchen1 <18449977+edgchen1@users.noreply.github.com>
Date: Mon, 13 Apr 2026 14:44:13 -0700
Subject: [PATCH 11/48] Add ESRP codesign step for macOS WebGPU plugin binary

Apple Silicon requires all executable code to be signed. Without this, dlopen triggers a SIGBUS (bus error) when loading the unsigned dylib.
---
 .../azure-pipelines/stages/plugin-mac-webgpu-stage.yml | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/tools/ci_build/github/azure-pipelines/stages/plugin-mac-webgpu-stage.yml b/tools/ci_build/github/azure-pipelines/stages/plugin-mac-webgpu-stage.yml
index 0184a9d377431..25d26063805e2 100644
--- a/tools/ci_build/github/azure-pipelines/stages/plugin-mac-webgpu-stage.yml
+++ b/tools/ci_build/github/azure-pipelines/stages/plugin-mac-webgpu-stage.yml
@@ -88,6 +88,16 @@ stages:
           libonnxruntime_providers_webgpu.dylib
         TargetFolder: '$(Build.ArtifactStagingDirectory)/bin'
 
+    - template: ../templates/mac-esrp-dylib.yml
+      parameters:
+        FolderPath: '$(Build.ArtifactStagingDirectory)/bin'
+        Pattern: '*.dylib'
+
+    - script: |
+        set -e -x
+        codesign --display --verbose=3 "$(Build.ArtifactStagingDirectory)/bin/libonnxruntime_providers_webgpu.dylib"
+      displayName: 'Verify signed binary'
+
     - script: |
         set -e -x
         mkdir -p "$(Build.ArtifactStagingDirectory)/version"

From 79f72c5615c6985feb46c79bb6d3301bc52b29c7 Mon Sep 17 00:00:00 2001
From: edgchen1 <18449977+edgchen1@users.noreply.github.com>
Date: Mon, 13 Apr 2026 15:30:28 -0700
Subject: [PATCH 12/48] Fix ESRP codesign: zip dylib before signing

ESRP requires .zip or .dmg input. Zip the dylib before signing, then unzip the signed result and verify.
---
 .../stages/plugin-mac-webgpu-stage.yml            | 15 ++++++++++++---
 1 file changed, 12 insertions(+), 3 deletions(-)

diff --git a/tools/ci_build/github/azure-pipelines/stages/plugin-mac-webgpu-stage.yml b/tools/ci_build/github/azure-pipelines/stages/plugin-mac-webgpu-stage.yml
index 25d26063805e2..0d72ba11a6ef0 100644
--- a/tools/ci_build/github/azure-pipelines/stages/plugin-mac-webgpu-stage.yml
+++ b/tools/ci_build/github/azure-pipelines/stages/plugin-mac-webgpu-stage.yml
@@ -88,15 +88,24 @@ stages:
           libonnxruntime_providers_webgpu.dylib
         TargetFolder: '$(Build.ArtifactStagingDirectory)/bin'
 
+    - script: |
+        set -e -x
+        zip webgpu_plugin.zip libonnxruntime_providers_webgpu.dylib
+      displayName: 'Zip plugin binary for signing'
+      workingDirectory: '$(Build.ArtifactStagingDirectory)/bin'
+
     - template: ../templates/mac-esrp-dylib.yml
       parameters:
         FolderPath: '$(Build.ArtifactStagingDirectory)/bin'
-        Pattern: '*.dylib'
+        Pattern: '*.zip'
 
     - script: |
         set -e -x
-        codesign --display --verbose=3 "$(Build.ArtifactStagingDirectory)/bin/libonnxruntime_providers_webgpu.dylib"
-      displayName: 'Verify signed binary'
+        unzip --overwrite webgpu_plugin.zip
+        rm -- webgpu_plugin.zip
+        codesign --display --verbose=3 libonnxruntime_providers_webgpu.dylib
+      displayName: 'Unzip and verify signed binary'
+      workingDirectory: '$(Build.ArtifactStagingDirectory)/bin'
 
     - script: |
         set -e -x

From ffdd4caccb0937fea76f48d5c48d2f462eac54cb Mon Sep 17 00:00:00 2001
From: edgchen1 <18449977+edgchen1@users.noreply.github.com>
Date: Mon, 13 Apr 2026 15:34:32 -0700
Subject: [PATCH 13/48] update zip file name, use as pattern

---
 .../azure-pipelines/stages/plugin-mac-webgpu-stage.yml    | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/tools/ci_build/github/azure-pipelines/stages/plugin-mac-webgpu-stage.yml b/tools/ci_build/github/azure-pipelines/stages/plugin-mac-webgpu-stage.yml
index 0d72ba11a6ef0..ced5dd14f632f 100644
--- a/tools/ci_build/github/azure-pipelines/stages/plugin-mac-webgpu-stage.yml
+++ b/tools/ci_build/github/azure-pipelines/stages/plugin-mac-webgpu-stage.yml
@@ -90,19 +90,19 @@ stages:
 
     - script: |
         set -e -x
-        zip webgpu_plugin.zip libonnxruntime_providers_webgpu.dylib
+        zip webgpu_plugin_ep_binaries.zip libonnxruntime_providers_webgpu.dylib
       displayName: 'Zip plugin binary for signing'
       workingDirectory: '$(Build.ArtifactStagingDirectory)/bin'
 
     - template: ../templates/mac-esrp-dylib.yml
       parameters:
         FolderPath: '$(Build.ArtifactStagingDirectory)/bin'
-        Pattern: '*.zip'
+        Pattern: 'webgpu_plugin_ep_binaries.zip'
 
     - script: |
         set -e -x
-        unzip --overwrite webgpu_plugin.zip
-        rm -- webgpu_plugin.zip
+        unzip --overwrite webgpu_plugin_ep_binaries.zip
+        rm -- webgpu_plugin_ep_binaries.zip
         codesign --display --verbose=3 libonnxruntime_providers_webgpu.dylib
       displayName: 'Unzip and verify signed binary'
       workingDirectory: '$(Build.ArtifactStagingDirectory)/bin'

From 6b10b94bc47ee49cf961c41e1f24986e06334a47 Mon Sep 17 00:00:00 2001
From: edgchen1 <18449977+edgchen1@users.noreply.github.com>
Date: Mon, 13 Apr 2026 15:47:17 -0700
Subject: [PATCH 14/48] Bootstrap pip with ensurepip in Linux Docker packaging
 script

The Docker image does not have pip pre-installed. Use ensurepip to bootstrap it before installing wheel build dependencies.
---
 .../ci_build/github/linux/build_webgpu_plugin_python_package.sh  | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tools/ci_build/github/linux/build_webgpu_plugin_python_package.sh b/tools/ci_build/github/linux/build_webgpu_plugin_python_package.sh
index e9e905f87dbe5..fab2df9603417 100755
--- a/tools/ci_build/github/linux/build_webgpu_plugin_python_package.sh
+++ b/tools/ci_build/github/linux/build_webgpu_plugin_python_package.sh
@@ -30,6 +30,7 @@ docker run --rm \
     "$DOCKER_IMAGE" \
     /bin/bash -c "
       set -e -x
+      python3 -m ensurepip
       python3 -m pip install -r /onnxruntime_src/plugin-ep-webgpu/python/requirements-build-wheel.txt
       python3 /onnxruntime_src/plugin-ep-webgpu/python/build_wheel.py \
         --binary_dir /build/plugin_artifacts/bin \

From 58a7311bdb1dad7a3cb5b85a6c6dd13af395fce4 Mon Sep 17 00:00:00 2001
From: edgchen1 <18449977+edgchen1@users.noreply.github.com>
Date: Mon, 13 Apr 2026 16:35:14 -0700
Subject: [PATCH 15/48] add more prints for Windows test step

---
 .../stages/plugin-win-webgpu-stage.yml             | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

diff --git a/tools/ci_build/github/azure-pipelines/stages/plugin-win-webgpu-stage.yml b/tools/ci_build/github/azure-pipelines/stages/plugin-win-webgpu-stage.yml
index 788a0d22d1be4..be0bb45622cb6 100644
--- a/tools/ci_build/github/azure-pipelines/stages/plugin-win-webgpu-stage.yml
+++ b/tools/ci_build/github/azure-pipelines/stages/plugin-win-webgpu-stage.yml
@@ -336,9 +336,23 @@ stages:
             targetType: inline
             pwsh: true
             script: |
+              $ErrorActionPreference = 'Stop'
+
+              echo "creating test_venv"
               python -m venv "$(Build.BinariesDirectory)\test_venv"
+
+              echo "activating test_venv"
               & "$(Build.BinariesDirectory)\test_venv\Scripts\Activate.ps1"
+
+              echo "installing onnxruntime onnx numpy"
               python -m pip install onnxruntime onnx numpy
+              if ($LASTEXITCODE -ne 0) { throw "pip install onnxruntime onnx numpy failed with exit code $LASTEXITCODE" }
+
               $wheel = (Get-ChildItem "$(Build.BinariesDirectory)\python_wheel\onnxruntime_ep_webgpu-*.whl")[0]
+              echo "installing ${wheel}"
               python -m pip install $wheel.FullName
+              if ($LASTEXITCODE -ne 0) { throw "pip install wheel failed with exit code $LASTEXITCODE" }
+
+              echo "running test_webgpu_plugin_ep.py"
               python "$(Build.SourcesDirectory)\plugin-ep-webgpu\python\test\test_webgpu_plugin_ep.py"
+              if ($LASTEXITCODE -ne 0) { throw "test_webgpu_plugin_ep.py failed with exit code $LASTEXITCODE (0x$($LASTEXITCODE.ToString('X')))" }

From 2610f1d32c6556b2e824853ad291abca0db09604 Mon Sep 17 00:00:00 2001
From: edgchen1 <18449977+edgchen1@users.noreply.github.com>
Date: Tue, 14 Apr 2026 09:33:00 -0700
Subject: [PATCH 16/48] use -o for unzip

---
 .../github/azure-pipelines/stages/plugin-mac-webgpu-stage.yml   | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/ci_build/github/azure-pipelines/stages/plugin-mac-webgpu-stage.yml b/tools/ci_build/github/azure-pipelines/stages/plugin-mac-webgpu-stage.yml
index ced5dd14f632f..8c3cce3dd589a 100644
--- a/tools/ci_build/github/azure-pipelines/stages/plugin-mac-webgpu-stage.yml
+++ b/tools/ci_build/github/azure-pipelines/stages/plugin-mac-webgpu-stage.yml
@@ -101,7 +101,7 @@ stages:
 
     - script: |
         set -e -x
-        unzip --overwrite webgpu_plugin_ep_binaries.zip
+        unzip -o webgpu_plugin_ep_binaries.zip
         rm -- webgpu_plugin_ep_binaries.zip
         codesign --display --verbose=3 libonnxruntime_providers_webgpu.dylib
       displayName: 'Unzip and verify signed binary'

From f87b27c0210b45482f14a09c1779741e70b49724 Mon Sep 17 00:00:00 2001
From: edgchen1 <18449977+edgchen1@users.noreply.github.com>
Date: Tue, 14 Apr 2026 10:25:50 -0700
Subject: [PATCH 17/48] Add unbuffered output and verbose ORT logging to plugin
 EP tests

Use python -u in all three platform pipelines so prints are flushed immediately, even if the process crashes during native DLL load. Add ort.set_default_logger_severity(0) in the test script for verbose ORT logging.
---
 plugin-ep-webgpu/python/test/test_webgpu_plugin_ep.py          | 3 +++
 .../azure-pipelines/stages/plugin-linux-webgpu-stage.yml       | 2 +-
 .../github/azure-pipelines/stages/plugin-mac-webgpu-stage.yml  | 2 +-
 .../github/azure-pipelines/stages/plugin-win-webgpu-stage.yml  | 2 +-
 4 files changed, 6 insertions(+), 3 deletions(-)

diff --git a/plugin-ep-webgpu/python/test/test_webgpu_plugin_ep.py b/plugin-ep-webgpu/python/test/test_webgpu_plugin_ep.py
index 382c4ddc880e5..5af66a655be8d 100644
--- a/plugin-ep-webgpu/python/test/test_webgpu_plugin_ep.py
+++ b/plugin-ep-webgpu/python/test/test_webgpu_plugin_ep.py
@@ -137,6 +137,9 @@ def test_registration_and_inference():
 def main():
     print("=== WebGPU Plugin EP Python Package Test ===")
 
+    # Set verbose logging so ORT internals are visible in CI logs
+    ort.set_default_logger_severity(0)
+
     print("\n--- Environment ---")
     print_environment_info()
 
diff --git a/tools/ci_build/github/azure-pipelines/stages/plugin-linux-webgpu-stage.yml b/tools/ci_build/github/azure-pipelines/stages/plugin-linux-webgpu-stage.yml
index b823a29ac1807..802b47dd1e76b 100644
--- a/tools/ci_build/github/azure-pipelines/stages/plugin-linux-webgpu-stage.yml
+++ b/tools/ci_build/github/azure-pipelines/stages/plugin-linux-webgpu-stage.yml
@@ -184,6 +184,6 @@ stages:
             python3 -m pip install onnxruntime onnx numpy
             wheel=\$(find /build/python_wheel -name 'onnxruntime_ep_webgpu-*.whl' | head -1)
             python3 -m pip install \"\$wheel\"
-            python3 /onnxruntime_src/plugin-ep-webgpu/python/test/test_webgpu_plugin_ep.py
+            python3 -u /onnxruntime_src/plugin-ep-webgpu/python/test/test_webgpu_plugin_ep.py
           "
       displayName: 'Install and test Python package'
diff --git a/tools/ci_build/github/azure-pipelines/stages/plugin-mac-webgpu-stage.yml b/tools/ci_build/github/azure-pipelines/stages/plugin-mac-webgpu-stage.yml
index 8c3cce3dd589a..f31c6bcf37fd6 100644
--- a/tools/ci_build/github/azure-pipelines/stages/plugin-mac-webgpu-stage.yml
+++ b/tools/ci_build/github/azure-pipelines/stages/plugin-mac-webgpu-stage.yml
@@ -208,5 +208,5 @@ stages:
         python3 -m pip install onnxruntime onnx numpy
         wheel=$(find "$(Build.BinariesDirectory)/python_wheel" -name "onnxruntime_ep_webgpu-*.whl" | head -1)
         python3 -m pip install "$wheel"
-        python3 "$(Build.SourcesDirectory)/plugin-ep-webgpu/python/test/test_webgpu_plugin_ep.py"
+        python3 -u "$(Build.SourcesDirectory)/plugin-ep-webgpu/python/test/test_webgpu_plugin_ep.py"
       displayName: 'Install and test Python package'
diff --git a/tools/ci_build/github/azure-pipelines/stages/plugin-win-webgpu-stage.yml b/tools/ci_build/github/azure-pipelines/stages/plugin-win-webgpu-stage.yml
index be0bb45622cb6..d8d2afa12617f 100644
--- a/tools/ci_build/github/azure-pipelines/stages/plugin-win-webgpu-stage.yml
+++ b/tools/ci_build/github/azure-pipelines/stages/plugin-win-webgpu-stage.yml
@@ -354,5 +354,5 @@ stages:
               if ($LASTEXITCODE -ne 0) { throw "pip install wheel failed with exit code $LASTEXITCODE" }
 
               echo "running test_webgpu_plugin_ep.py"
-              python "$(Build.SourcesDirectory)\plugin-ep-webgpu\python\test\test_webgpu_plugin_ep.py"
+              python -u "$(Build.SourcesDirectory)\plugin-ep-webgpu\python\test\test_webgpu_plugin_ep.py"
               if ($LASTEXITCODE -ne 0) { throw "test_webgpu_plugin_ep.py failed with exit code $LASTEXITCODE (0x$($LASTEXITCODE.ToString('X')))" }

From 09f6b23f4bb4c1188c6e83e236d05c926c94e013 Mon Sep 17 00:00:00 2001
From: edgchen1 <18449977+edgchen1@users.noreply.github.com>
Date: Wed, 15 Apr 2026 10:28:14 -0700
Subject: [PATCH 18/48] Replace unsafe reinterpret_cast with C API calls in
 op_kernel_info.h

Remove reinterpret_cast of OrtKernelInfo* to internal OpKernelInfo* that breaks ABI across DLL boundaries (vtable mismatch between plugin EP and ORT core).

- KernelInfoCache: use Ort::ConstKernelInfo::GetEp() instead of casting to OpKernelInfo* and calling GetExecutionProvider()->GetOrtEp()

- GetAllocator: use C API KernelInfoGetAllocator + IAllocatorImplWrappingOrtAllocator instead of casting to OpKernelInfo*

- Remove #include core/framework/op_kernel_info.h (no longer needed)

- Add #include core/session/allocator_adapters.h for IAllocatorImplWrappingOrtAllocator
---
 .../onnxruntime/ep/adapter/op_kernel_info.h   | 20 +++++++++----------
 1 file changed, 9 insertions(+), 11 deletions(-)

diff --git a/include/onnxruntime/ep/adapter/op_kernel_info.h b/include/onnxruntime/ep/adapter/op_kernel_info.h
index 7e61385f3686c..84beb0993ce02 100644
--- a/include/onnxruntime/ep/adapter/op_kernel_info.h
+++ b/include/onnxruntime/ep/adapter/op_kernel_info.h
@@ -12,9 +12,9 @@
 #include "core/common/narrow.h"
 #include "core/common/status.h"
 #include "core/framework/config_options.h"
-#include "core/framework/op_kernel_info.h"
 #include "core/framework/tensor_shape.h"
 #include "core/framework/tensor.h"
+#include "core/session/allocator_adapters.h"
 
 #include "node.h"
 #include "kernel_def.h"
@@ -43,12 +43,10 @@ struct OpKernelInfo {
   // to manage the lifetime of the cached data.
   struct KernelInfoCache {
     explicit KernelInfoCache(const OrtKernelInfo* kernel_info) : kernel_info_(kernel_info) {
-      const auto* core_kernel_info = reinterpret_cast<const ::onnxruntime::OpKernelInfo*>(kernel_info);
-      execution_provider_ = core_kernel_info->GetExecutionProvider();
-      ort_ep_ = execution_provider_ != nullptr ? execution_provider_->GetOrtEp() : nullptr;
-      ep_impl_ = ort_ep_ != nullptr ? (static_cast<const Ep*>(ort_ep_))->EpImpl() : execution_provider_;
-
       Ort::ConstKernelInfo info{kernel_info};
+      ort_ep_ = info.GetEp();
+      ep_impl_ = ort_ep_ != nullptr ? (static_cast<const Ep*>(ort_ep_))->EpImpl() : nullptr;
+
       const size_t input_count = info.GetInputCount();
       constant_input_tensors.resize(input_count);
       for (size_t i = 0; i < input_count; ++i) {
@@ -60,7 +58,6 @@ struct OpKernelInfo {
       }
     }
     const OrtKernelInfo* kernel_info_;
-    const ::onnxruntime::IExecutionProvider* execution_provider_{};
     const OrtEp* ort_ep_{};
     const ::onnxruntime::IExecutionProvider* ep_impl_{};
     std::vector<Tensor> constant_input_tensors;
@@ -74,11 +71,12 @@ struct OpKernelInfo {
     return (static_cast<const Ep*>(cache_->ort_ep_))->GetDataTransferManager();
   }
 
-  // Delegates to the core OpKernelInfo::GetAllocator so the adapter returns
-  // exactly the same allocator the framework would provide for each OrtMemType.
+  // Retrieves the allocator for a specific memory type via the C API, avoiding
+  // unsafe casts to internal types that would break ABI across DLL boundaries.
   AllocatorPtr GetAllocator(OrtMemType mem_type) const {
-    const auto* core_kernel_info = reinterpret_cast<const ::onnxruntime::OpKernelInfo*>(cache_->kernel_info_);
-    return core_kernel_info->GetAllocator(mem_type);
+    OrtAllocator* ort_allocator = nullptr;
+    Ort::ThrowOnError(Ort::GetApi().KernelInfoGetAllocator(cache_->kernel_info_, mem_type, &ort_allocator));
+    return std::make_shared<IAllocatorImplWrappingOrtAllocator>(ort_allocator);
   }
 
   Node node() const noexcept {

From 9543f5d9b48ca13bf6f0b9fb92093ffd11ec470b Mon Sep 17 00:00:00 2001
From: edgchen1 <18449977+edgchen1@users.noreply.github.com>
Date: Wed, 15 Apr 2026 11:01:40 -0700
Subject: [PATCH 19/48] remove comment that seems unnecessary

---
 include/onnxruntime/ep/adapter/op_kernel_info.h | 2 --
 1 file changed, 2 deletions(-)

diff --git a/include/onnxruntime/ep/adapter/op_kernel_info.h b/include/onnxruntime/ep/adapter/op_kernel_info.h
index 84beb0993ce02..b2ebb37b617ff 100644
--- a/include/onnxruntime/ep/adapter/op_kernel_info.h
+++ b/include/onnxruntime/ep/adapter/op_kernel_info.h
@@ -71,8 +71,6 @@ struct OpKernelInfo {
     return (static_cast<const Ep*>(cache_->ort_ep_))->GetDataTransferManager();
   }
 
-  // Retrieves the allocator for a specific memory type via the C API, avoiding
-  // unsafe casts to internal types that would break ABI across DLL boundaries.
   AllocatorPtr GetAllocator(OrtMemType mem_type) const {
     OrtAllocator* ort_allocator = nullptr;
     Ort::ThrowOnError(Ort::GetApi().KernelInfoGetAllocator(cache_->kernel_info_, mem_type, &ort_allocator));

From 4d5557d78067ac0eb46456e938d4e8be1f7525c2 Mon Sep 17 00:00:00 2001
From: edgchen1 <18449977+edgchen1@users.noreply.github.com>
Date: Wed, 15 Apr 2026 11:08:58 -0700
Subject: [PATCH 20/48] update build_wheel to use more Path functionality

---
 plugin-ep-webgpu/python/build_wheel.py | 40 ++++++++++++--------------
 1 file changed, 18 insertions(+), 22 deletions(-)

diff --git a/plugin-ep-webgpu/python/build_wheel.py b/plugin-ep-webgpu/python/build_wheel.py
index 45a934b5a87ea..531b671065003 100644
--- a/plugin-ep-webgpu/python/build_wheel.py
+++ b/plugin-ep-webgpu/python/build_wheel.py
@@ -9,8 +9,6 @@
 """
 
 import argparse
-import glob
-import os
 import platform
 import shutil
 import subprocess
@@ -93,39 +91,37 @@ def auditwheel_repair(wheel_dir: Path):
     if platform.system() != "Linux":
         return
 
-    raw_wheels = glob.glob(str(wheel_dir / "onnxruntime_ep_webgpu-*.whl"))
+    raw_wheels = wheel_dir.glob("onnxruntime_ep_webgpu-*.whl")
     if not raw_wheels:
         return
 
-    repaired_dir = wheel_dir / "_repaired"
-    repaired_dir.mkdir(parents=True, exist_ok=True)
+    with tempfile.TemporaryDirectory() as repaired_dir_name:
+        repaired_dir = Path(repaired_dir_name)
 
-    for wheel in raw_wheels:
-        cmd = [sys.executable, "-m", "auditwheel", "repair", wheel,
-               "--wheel-dir", str(repaired_dir)]
-        for lib in AUDITWHEEL_EXCLUDE:
-            cmd.extend(["--exclude", lib])
-        print(f"Running: {' '.join(cmd)}")
-        subprocess.check_call(cmd)
+        for wheel in raw_wheels:
+            cmd = [sys.executable, "-m", "auditwheel", "repair", str(wheel), "--wheel-dir", str(repaired_dir)]
+            for lib in AUDITWHEEL_EXCLUDE:
+                cmd.extend(["--exclude", lib])
+            print(f"Running: {' '.join(cmd)}")
+            subprocess.check_call(cmd)
 
-    # Replace raw wheels with repaired ones
-    for wheel in raw_wheels:
-        os.remove(wheel)
-    for repaired_wheel in repaired_dir.glob("*.whl"):
-        shutil.move(str(repaired_wheel), str(wheel_dir / repaired_wheel.name))
-    repaired_dir.rmdir()
+        # Replace raw wheels with repaired ones
+        for repaired_wheel in repaired_dir.glob("*.whl"):
+            repaired_wheel.replace(wheel_dir / repaired_wheel.name)
 
 
 def collect_wheels(wheel_dir: Path, output_dir: Path):
     """Copy built wheels to the output directory and verify at least one was produced."""
-    wheels = glob.glob(str(wheel_dir / "onnxruntime_ep_webgpu-*.whl"))
+    wheels = wheel_dir.glob("onnxruntime_ep_webgpu-*.whl")
     if not wheels:
         print("ERROR: No wheel was produced", file=sys.stderr)
         sys.exit(1)
+
     output_dir.mkdir(parents=True, exist_ok=True)
-    for w in wheels:
-        dest = output_dir / Path(w).name
-        shutil.copy2(w, dest)
+
+    for wheel in wheels:
+        dest = output_dir / wheel.name
+        shutil.copy2(wheel, dest)
         print(f"Built wheel: {dest}")
 
 

From 080704ef5ef120fc1c8a9084e116f1779ac90473 Mon Sep 17 00:00:00 2001
From: edgchen1 <18449977+edgchen1@users.noreply.github.com>
Date: Wed, 15 Apr 2026 16:04:25 -0700
Subject: [PATCH 21/48] Fix auditwheel producing duplicate wheels

Materialize the glob generator to a list so the emptiness check works, and delete each raw wheel after auditwheel repair so only the manylinux wheel remains.
---
 plugin-ep-webgpu/python/build_wheel.py | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/plugin-ep-webgpu/python/build_wheel.py b/plugin-ep-webgpu/python/build_wheel.py
index 531b671065003..27d55b8c95e2d 100644
--- a/plugin-ep-webgpu/python/build_wheel.py
+++ b/plugin-ep-webgpu/python/build_wheel.py
@@ -95,17 +95,23 @@ def auditwheel_repair(wheel_dir: Path):
     if not raw_wheels:
         return
 
+    raw_wheel_list = list(raw_wheels)
+    if not raw_wheel_list:
+        return
+
     with tempfile.TemporaryDirectory() as repaired_dir_name:
         repaired_dir = Path(repaired_dir_name)
 
-        for wheel in raw_wheels:
+        for wheel in raw_wheel_list:
             cmd = [sys.executable, "-m", "auditwheel", "repair", str(wheel), "--wheel-dir", str(repaired_dir)]
             for lib in AUDITWHEEL_EXCLUDE:
                 cmd.extend(["--exclude", lib])
             print(f"Running: {' '.join(cmd)}")
             subprocess.check_call(cmd)
+            # Remove the raw wheel so only the repaired one remains
+            wheel.unlink()
 
-        # Replace raw wheels with repaired ones
+        # Move repaired wheels into wheel_dir
         for repaired_wheel in repaired_dir.glob("*.whl"):
             repaired_wheel.replace(wheel_dir / repaired_wheel.name)
 

From 3f08e3e7d144ac9af5125a307fdb070161f722d7 Mon Sep 17 00:00:00 2001
From: edgchen1 <18449977+edgchen1@users.noreply.github.com>
Date: Wed, 15 Apr 2026 18:51:47 -0700
Subject: [PATCH 22/48] Add set-nightly-build-option-variable-step to
 Python_Package jobs

Add the missing set-nightly-build-option-variable-step.yml template to all three platform Python_Package jobs for consistency with the Build jobs.
---
 .../github/azure-pipelines/stages/plugin-linux-webgpu-stage.yml | 2 ++
 .../github/azure-pipelines/stages/plugin-mac-webgpu-stage.yml   | 2 ++
 .../github/azure-pipelines/stages/plugin-win-webgpu-stage.yml   | 2 ++
 3 files changed, 6 insertions(+)

diff --git a/tools/ci_build/github/azure-pipelines/stages/plugin-linux-webgpu-stage.yml b/tools/ci_build/github/azure-pipelines/stages/plugin-linux-webgpu-stage.yml
index 802b47dd1e76b..d9c9932ed286f 100644
--- a/tools/ci_build/github/azure-pipelines/stages/plugin-linux-webgpu-stage.yml
+++ b/tools/ci_build/github/azure-pipelines/stages/plugin-linux-webgpu-stage.yml
@@ -120,6 +120,8 @@ stages:
       clean: true
       submodules: none
 
+    - template: ../templates/set-nightly-build-option-variable-step.yml
+
     - template: ../templates/set-plugin-build-variables-step.yml
       parameters:
         package_version: ${{ parameters.package_version }}
diff --git a/tools/ci_build/github/azure-pipelines/stages/plugin-mac-webgpu-stage.yml b/tools/ci_build/github/azure-pipelines/stages/plugin-mac-webgpu-stage.yml
index f31c6bcf37fd6..4c7d855456e58 100644
--- a/tools/ci_build/github/azure-pipelines/stages/plugin-mac-webgpu-stage.yml
+++ b/tools/ci_build/github/azure-pipelines/stages/plugin-mac-webgpu-stage.yml
@@ -156,6 +156,8 @@ stages:
       parameters:
         host_cpu_arch: 'arm64'
 
+    - template: ../templates/set-nightly-build-option-variable-step.yml
+
     - template: ../templates/set-plugin-build-variables-step.yml
       parameters:
         package_version: ${{ parameters.package_version }}
diff --git a/tools/ci_build/github/azure-pipelines/stages/plugin-win-webgpu-stage.yml b/tools/ci_build/github/azure-pipelines/stages/plugin-win-webgpu-stage.yml
index d8d2afa12617f..572c078473eea 100644
--- a/tools/ci_build/github/azure-pipelines/stages/plugin-win-webgpu-stage.yml
+++ b/tools/ci_build/github/azure-pipelines/stages/plugin-win-webgpu-stage.yml
@@ -284,6 +284,8 @@ stages:
           parameters:
             host_cpu_arch: 'x64'
 
+        - template: ../templates/set-nightly-build-option-variable-step.yml
+
         - template: ../templates/set-plugin-build-variables-step.yml
           parameters:
             package_version: ${{ parameters.package_version }}

From 907df11dcfbd9df8aa75f9e9748e3f4d221f53bc Mon Sep 17 00:00:00 2001
From: edgchen1 <18449977+edgchen1@users.noreply.github.com>
Date: Wed, 15 Apr 2026 16:04:25 -0700
Subject: [PATCH 23/48] Fix auditwheel producing duplicate wheels

Materialize the glob generator to a list so the emptiness check works, and delete each raw wheel after auditwheel repair so only the manylinux wheel remains.
---
 plugin-ep-webgpu/python/build_wheel.py | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/plugin-ep-webgpu/python/build_wheel.py b/plugin-ep-webgpu/python/build_wheel.py
index 531b671065003..27d55b8c95e2d 100644
--- a/plugin-ep-webgpu/python/build_wheel.py
+++ b/plugin-ep-webgpu/python/build_wheel.py
@@ -95,17 +95,23 @@ def auditwheel_repair(wheel_dir: Path):
     if not raw_wheels:
         return
 
+    raw_wheel_list = list(raw_wheels)
+    if not raw_wheel_list:
+        return
+
     with tempfile.TemporaryDirectory() as repaired_dir_name:
         repaired_dir = Path(repaired_dir_name)
 
-        for wheel in raw_wheels:
+        for wheel in raw_wheel_list:
             cmd = [sys.executable, "-m", "auditwheel", "repair", str(wheel), "--wheel-dir", str(repaired_dir)]
             for lib in AUDITWHEEL_EXCLUDE:
                 cmd.extend(["--exclude", lib])
             print(f"Running: {' '.join(cmd)}")
             subprocess.check_call(cmd)
+            # Remove the raw wheel so only the repaired one remains
+            wheel.unlink()
 
-        # Replace raw wheels with repaired ones
+        # Move repaired wheels into wheel_dir
         for repaired_wheel in repaired_dir.glob("*.whl"):
             repaired_wheel.replace(wheel_dir / repaired_wheel.name)
 

From 8bf739ad282e8a8a8c6e621925ae114effb6b314 Mon Sep 17 00:00:00 2001
From: edgchen1 <18449977+edgchen1@users.noreply.github.com>
Date: Wed, 15 Apr 2026 18:51:47 -0700
Subject: [PATCH 24/48] Add set-nightly-build-option-variable-step to
 Python_Package jobs

Add the missing set-nightly-build-option-variable-step.yml template to all three platform Python_Package jobs for consistency with the Build jobs.
---
 .../github/azure-pipelines/stages/plugin-linux-webgpu-stage.yml | 2 ++
 .../github/azure-pipelines/stages/plugin-mac-webgpu-stage.yml   | 2 ++
 .../github/azure-pipelines/stages/plugin-win-webgpu-stage.yml   | 2 ++
 3 files changed, 6 insertions(+)

diff --git a/tools/ci_build/github/azure-pipelines/stages/plugin-linux-webgpu-stage.yml b/tools/ci_build/github/azure-pipelines/stages/plugin-linux-webgpu-stage.yml
index 802b47dd1e76b..d9c9932ed286f 100644
--- a/tools/ci_build/github/azure-pipelines/stages/plugin-linux-webgpu-stage.yml
+++ b/tools/ci_build/github/azure-pipelines/stages/plugin-linux-webgpu-stage.yml
@@ -120,6 +120,8 @@ stages:
       clean: true
       submodules: none
 
+    - template: ../templates/set-nightly-build-option-variable-step.yml
+
     - template: ../templates/set-plugin-build-variables-step.yml
       parameters:
         package_version: ${{ parameters.package_version }}
diff --git a/tools/ci_build/github/azure-pipelines/stages/plugin-mac-webgpu-stage.yml b/tools/ci_build/github/azure-pipelines/stages/plugin-mac-webgpu-stage.yml
index f31c6bcf37fd6..4c7d855456e58 100644
--- a/tools/ci_build/github/azure-pipelines/stages/plugin-mac-webgpu-stage.yml
+++ b/tools/ci_build/github/azure-pipelines/stages/plugin-mac-webgpu-stage.yml
@@ -156,6 +156,8 @@ stages:
       parameters:
         host_cpu_arch: 'arm64'
 
+    - template: ../templates/set-nightly-build-option-variable-step.yml
+
     - template: ../templates/set-plugin-build-variables-step.yml
       parameters:
         package_version: ${{ parameters.package_version }}
diff --git a/tools/ci_build/github/azure-pipelines/stages/plugin-win-webgpu-stage.yml b/tools/ci_build/github/azure-pipelines/stages/plugin-win-webgpu-stage.yml
index d8d2afa12617f..572c078473eea 100644
--- a/tools/ci_build/github/azure-pipelines/stages/plugin-win-webgpu-stage.yml
+++ b/tools/ci_build/github/azure-pipelines/stages/plugin-win-webgpu-stage.yml
@@ -284,6 +284,8 @@ stages:
           parameters:
             host_cpu_arch: 'x64'
 
+        - template: ../templates/set-nightly-build-option-variable-step.yml
+
         - template: ../templates/set-plugin-build-variables-step.yml
           parameters:
             package_version: ${{ parameters.package_version }}

From c57b4ae05a21d14b4cfb833cf52e540b9e20671d Mon Sep 17 00:00:00 2001
From: edgchen1 <18449977+edgchen1@users.noreply.github.com>
Date: Fri, 17 Apr 2026 08:49:26 -0700
Subject: [PATCH 25/48] add setup-feeds-and-python-steps template to get .npmrc
 set up

---
 .../github/azure-pipelines/stages/plugin-linux-webgpu-stage.yml | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/tools/ci_build/github/azure-pipelines/stages/plugin-linux-webgpu-stage.yml b/tools/ci_build/github/azure-pipelines/stages/plugin-linux-webgpu-stage.yml
index d9c9932ed286f..1fa9c856a673b 100644
--- a/tools/ci_build/github/azure-pipelines/stages/plugin-linux-webgpu-stage.yml
+++ b/tools/ci_build/github/azure-pipelines/stages/plugin-linux-webgpu-stage.yml
@@ -53,6 +53,8 @@ stages:
         parameters:
           package_version: ${{ parameters.package_version }}
 
+      - template: ../templates/setup-feeds-and-python-steps.yml
+
       - template: ../templates/get-docker-image-steps.yml
         parameters:
           Dockerfile: tools/ci_build/github/linux/docker/inference/x86_64/python/cuda/Dockerfile

From 82639ce28ea3e3bad4fc85806cb5c443c51f1317 Mon Sep 17 00:00:00 2001
From: edgchen1 <18449977+edgchen1@users.noreply.github.com>
Date: Tue, 21 Apr 2026 09:51:08 -0700
Subject: [PATCH 26/48] try adding setup-feed-and-python-steps.yml to Linux
 Python packaging stage.

---
 .../github/azure-pipelines/stages/plugin-linux-webgpu-stage.yml | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/tools/ci_build/github/azure-pipelines/stages/plugin-linux-webgpu-stage.yml b/tools/ci_build/github/azure-pipelines/stages/plugin-linux-webgpu-stage.yml
index 1fa9c856a673b..eed8842ec0f14 100644
--- a/tools/ci_build/github/azure-pipelines/stages/plugin-linux-webgpu-stage.yml
+++ b/tools/ci_build/github/azure-pipelines/stages/plugin-linux-webgpu-stage.yml
@@ -128,6 +128,8 @@ stages:
       parameters:
         package_version: ${{ parameters.package_version }}
 
+    - template: ../templates/setup-feeds-and-python-steps.yml
+
     - template: ../templates/get-docker-image-steps.yml
       parameters:
         Dockerfile: tools/ci_build/github/linux/docker/inference/x86_64/python/cuda/Dockerfile

From 3b876ccdfc5de3175bdf00da16b1c04872c80451 Mon Sep 17 00:00:00 2001
From: edgchen1 <18449977+edgchen1@users.noreply.github.com>
Date: Tue, 21 Apr 2026 11:30:53 -0700
Subject: [PATCH 27/48] pass through PIP_INDEX_URL env variable for docker run
 commands

---
 .../azure-pipelines/stages/plugin-linux-webgpu-stage.yml       | 1 +
 .../github/linux/build_webgpu_plugin_python_package.sh         | 3 ++-
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/tools/ci_build/github/azure-pipelines/stages/plugin-linux-webgpu-stage.yml b/tools/ci_build/github/azure-pipelines/stages/plugin-linux-webgpu-stage.yml
index eed8842ec0f14..6786f9aff4946 100644
--- a/tools/ci_build/github/azure-pipelines/stages/plugin-linux-webgpu-stage.yml
+++ b/tools/ci_build/github/azure-pipelines/stages/plugin-linux-webgpu-stage.yml
@@ -182,6 +182,7 @@ stages:
         docker run --rm --gpus all \
           --volume "$(Build.SourcesDirectory):/onnxruntime_src" \
           --volume "$(Build.BinariesDirectory):/build" \
+          --env "PIP_INDEX_URL=${PIP_INDEX_URL}" \
           onnxruntimewebgpuplugin \
           /bin/bash -c "
             set -e -x
diff --git a/tools/ci_build/github/linux/build_webgpu_plugin_python_package.sh b/tools/ci_build/github/linux/build_webgpu_plugin_python_package.sh
index fab2df9603417..317f537df020b 100755
--- a/tools/ci_build/github/linux/build_webgpu_plugin_python_package.sh
+++ b/tools/ci_build/github/linux/build_webgpu_plugin_python_package.sh
@@ -27,6 +27,7 @@ docker run --rm \
     --volume "${BUILD_SOURCESDIRECTORY}:/onnxruntime_src" \
     --volume "${BUILD_BINARIESDIRECTORY}:/build" \
     --volume "${BUILD_ARTIFACTSTAGINGDIRECTORY}:/staging" \
+    --env "PIP_INDEX_URL=${PIP_INDEX_URL}" \
     "$DOCKER_IMAGE" \
     /bin/bash -c "
       set -e -x
@@ -34,6 +35,6 @@ docker run --rm \
       python3 -m pip install -r /onnxruntime_src/plugin-ep-webgpu/python/requirements-build-wheel.txt
       python3 /onnxruntime_src/plugin-ep-webgpu/python/build_wheel.py \
         --binary_dir /build/plugin_artifacts/bin \
-        --version ${VERSION} \
+        --version "${VERSION}" \
         --output_dir /staging/python
     "

From fec0eaf040ecebf25695a4c9019390ebeca61b8c Mon Sep 17 00:00:00 2001
From: edgchen1 <18449977+edgchen1@users.noreply.github.com>
Date: Tue, 21 Apr 2026 13:03:24 -0700
Subject: [PATCH 28/48] increase build timeout

---
 .../azure-pipelines/stages/plugin-linux-webgpu-stage.yml      | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tools/ci_build/github/azure-pipelines/stages/plugin-linux-webgpu-stage.yml b/tools/ci_build/github/azure-pipelines/stages/plugin-linux-webgpu-stage.yml
index 6786f9aff4946..9b6eb8b0f123c 100644
--- a/tools/ci_build/github/azure-pipelines/stages/plugin-linux-webgpu-stage.yml
+++ b/tools/ci_build/github/azure-pipelines/stages/plugin-linux-webgpu-stage.yml
@@ -104,7 +104,7 @@ stages:
   # Python package build job
   - job: Linux_plugin_webgpu_x64_Python_Package
     dependsOn: Linux_plugin_webgpu_x64_Build
-    timeoutInMinutes: 30
+    timeoutInMinutes: 60
     workspace:
       clean: all
     pool:
@@ -153,7 +153,7 @@ stages:
   # Python package test job
   - job: Linux_plugin_webgpu_x64_Python_Test
     dependsOn: Linux_plugin_webgpu_x64_Python_Package
-    timeoutInMinutes: 30
+    timeoutInMinutes: 60
     workspace:
       clean: all
     pool:

From 90aaf7cceef06d2d29deec7698d7aafacc2a755c Mon Sep 17 00:00:00 2001
From: edgchen1 <18449977+edgchen1@users.noreply.github.com>
Date: Tue, 21 Apr 2026 14:28:28 -0700
Subject: [PATCH 29/48] add another setup template

---
 .../github/azure-pipelines/stages/plugin-linux-webgpu-stage.yml | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/tools/ci_build/github/azure-pipelines/stages/plugin-linux-webgpu-stage.yml b/tools/ci_build/github/azure-pipelines/stages/plugin-linux-webgpu-stage.yml
index 9b6eb8b0f123c..7ba9a31a651fd 100644
--- a/tools/ci_build/github/azure-pipelines/stages/plugin-linux-webgpu-stage.yml
+++ b/tools/ci_build/github/azure-pipelines/stages/plugin-linux-webgpu-stage.yml
@@ -164,6 +164,8 @@ stages:
       clean: true
       submodules: none
 
+    - template: ../templates/setup-feeds-and-python-steps.yml
+
     - template: ../templates/get-docker-image-steps.yml
       parameters:
         Dockerfile: tools/ci_build/github/linux/docker/inference/x86_64/python/cuda/Dockerfile

From ea448619d136f9870e5154c8fa2b3c0d32301f9f Mon Sep 17 00:00:00 2001
From: edgchen1 <18449977+edgchen1@users.noreply.github.com>
Date: Tue, 21 Apr 2026 17:49:18 -0700
Subject: [PATCH 30/48] Add dedicated Dockerfile for Linux WebGPU EP plugin
 pipeline

The WebGPU EP has no CUDA dependency (Dawn uses Vulkan on Linux), so
having the plugin-linux-webgpu-stage.yml pipeline reuse the CUDA
inference Dockerfile pulled in TensorRT/cuDNN unnecessarily and was
missing libvulkan.so.1, causing the test job to fail with:
  Couldn't load Vulkan: libvulkan.so.1: cannot open shared object file

Add a new Dockerfile under inference/x86_64/python/webgpu/ modeled on
the CPU Dockerfile, based on the CPU build-cache image, with an
additional 'dnf install vulkan-loader' step so Dawn can reach the
GPU's Vulkan ICD (injected by the NVIDIA Container Toolkit via
--gpus all at runtime).

Update all three jobs (build, package, test) in
plugin-linux-webgpu-stage.yml to use the new Dockerfile and switch
the docker_base_image default to the CPU base image.
---
 .../stages/plugin-linux-webgpu-stage.yml         | 14 +++++++-------
 .../inference/x86_64/python/webgpu/Dockerfile    | 16 ++++++++++++++++
 .../python/webgpu/scripts/install_centos.sh      |  7 +++++++
 3 files changed, 30 insertions(+), 7 deletions(-)
 create mode 100644 tools/ci_build/github/linux/docker/inference/x86_64/python/webgpu/Dockerfile
 create mode 100755 tools/ci_build/github/linux/docker/inference/x86_64/python/webgpu/scripts/install_centos.sh

diff --git a/tools/ci_build/github/azure-pipelines/stages/plugin-linux-webgpu-stage.yml b/tools/ci_build/github/azure-pipelines/stages/plugin-linux-webgpu-stage.yml
index 7ba9a31a651fd..6c4ecd7abe559 100644
--- a/tools/ci_build/github/azure-pipelines/stages/plugin-linux-webgpu-stage.yml
+++ b/tools/ci_build/github/azure-pipelines/stages/plugin-linux-webgpu-stage.yml
@@ -22,7 +22,7 @@ parameters:
 
 - name: docker_base_image
   type: string
-  default: 'onnxruntimebuildcache.azurecr.io/internal/azureml/onnxruntime/build/cuda12_x64_almalinux8_gcc14:20251017.1'
+  default: 'onnxruntimebuildcache.azurecr.io/internal/azureml/onnxruntime/build/cpu_x64_almalinux8_gcc14:20251017.1'
 
 stages:
 - stage: Linux_plugin_webgpu_x64_Build
@@ -57,8 +57,8 @@ stages:
 
       - template: ../templates/get-docker-image-steps.yml
         parameters:
-          Dockerfile: tools/ci_build/github/linux/docker/inference/x86_64/python/cuda/Dockerfile
-          Context: tools/ci_build/github/linux/docker/inference/x86_64/python/cuda
+          Dockerfile: tools/ci_build/github/linux/docker/inference/x86_64/python/webgpu/Dockerfile
+          Context: tools/ci_build/github/linux/docker/inference/x86_64/python/webgpu
           DockerBuildArgs: "--build-arg BASEIMAGE=${{ parameters.docker_base_image }} --build-arg BUILD_UID=$( id -u )"
           Repository: onnxruntimewebgpuplugin
 
@@ -132,8 +132,8 @@ stages:
 
     - template: ../templates/get-docker-image-steps.yml
       parameters:
-        Dockerfile: tools/ci_build/github/linux/docker/inference/x86_64/python/cuda/Dockerfile
-        Context: tools/ci_build/github/linux/docker/inference/x86_64/python/cuda
+        Dockerfile: tools/ci_build/github/linux/docker/inference/x86_64/python/webgpu/Dockerfile
+        Context: tools/ci_build/github/linux/docker/inference/x86_64/python/webgpu
         DockerBuildArgs: "--build-arg BASEIMAGE=${{ parameters.docker_base_image }} --build-arg BUILD_UID=$( id -u )"
         Repository: onnxruntimewebgpuplugin
 
@@ -168,8 +168,8 @@ stages:
 
     - template: ../templates/get-docker-image-steps.yml
       parameters:
-        Dockerfile: tools/ci_build/github/linux/docker/inference/x86_64/python/cuda/Dockerfile
-        Context: tools/ci_build/github/linux/docker/inference/x86_64/python/cuda
+        Dockerfile: tools/ci_build/github/linux/docker/inference/x86_64/python/webgpu/Dockerfile
+        Context: tools/ci_build/github/linux/docker/inference/x86_64/python/webgpu
         DockerBuildArgs: "--build-arg BASEIMAGE=${{ parameters.docker_base_image }} --build-arg BUILD_UID=$( id -u )"
         Repository: onnxruntimewebgpuplugin
 
diff --git a/tools/ci_build/github/linux/docker/inference/x86_64/python/webgpu/Dockerfile b/tools/ci_build/github/linux/docker/inference/x86_64/python/webgpu/Dockerfile
new file mode 100644
index 0000000000000..1327bcbd3740e
--- /dev/null
+++ b/tools/ci_build/github/linux/docker/inference/x86_64/python/webgpu/Dockerfile
@@ -0,0 +1,16 @@
+ARG BASEIMAGE=onnxruntimebuildcache.azurecr.io/internal/azureml/onnxruntime/build/cpu_x64_almalinux8_gcc14:20251017.1
+FROM $BASEIMAGE
+
+ADD scripts /tmp/scripts
+RUN cd /tmp/scripts && /tmp/scripts/install_centos.sh && rm -rf /tmp/scripts
+
+# Install Vulkan loader so Dawn/WebGPU can access the GPU at runtime.
+# The NVIDIA Container Toolkit injects the Vulkan ICD driver when the
+# container is run with --gpus all.
+RUN dnf install -y vulkan-loader && dnf clean all
+
+ARG BUILD_UID=1001
+ARG BUILD_USER=onnxruntimedev
+RUN adduser --uid $BUILD_UID $BUILD_USER
+WORKDIR /home/$BUILD_USER
+USER $BUILD_USER
diff --git a/tools/ci_build/github/linux/docker/inference/x86_64/python/webgpu/scripts/install_centos.sh b/tools/ci_build/github/linux/docker/inference/x86_64/python/webgpu/scripts/install_centos.sh
new file mode 100755
index 0000000000000..1ced7cd2f90c8
--- /dev/null
+++ b/tools/ci_build/github/linux/docker/inference/x86_64/python/webgpu/scripts/install_centos.sh
@@ -0,0 +1,7 @@
+#!/bin/bash
+set -e
+
+os_major_version=$(tr -dc '0-9.' < /etc/redhat-release |cut -d \. -f1)
+
+echo "installing for os major version : $os_major_version"
+dnf install -y glibc-langpack-\* which redhat-lsb-core expat-devel tar unzip zlib-devel make bzip2 bzip2-devel perl-IPC-Cmd openssl-devel wget

From 30990dea1cf5047f9890831fc7bb3ee2ebccc878 Mon Sep 17 00:00:00 2001
From: edgchen1 <18449977+edgchen1@users.noreply.github.com>
Date: Tue, 21 Apr 2026 19:15:53 -0700
Subject: [PATCH 31/48] Enable NVIDIA graphics capability and add Vulkan
 diagnostics

The WebGPU plugin's Python test job was failing with:

  setup_loader_term_phys_devs: Failed to detect any valid GPUs in the
  current config

The NVIDIA Container Toolkit only injects the CUDA portions of the
driver by default (capabilities = utility,compute). Vulkan additionally
requires the 'graphics' capability, which injects libGLX_nvidia, the
NVIDIA Vulkan ICD JSON, and associated userspace libs.

- Set NVIDIA_DRIVER_CAPABILITIES=compute,utility,graphics and
  NVIDIA_VISIBLE_DEVICES=all in the WebGPU plugin image so every
  'docker run --gpus all' gets a working Vulkan ICD.
- Install vulkan-tools in the image for diagnostic purposes (to be
  removed once the test is stable).
- Add a temporary diagnostics block to the Python test job that dumps
  the relevant NVIDIA/Vulkan state (env vars, nvidia-smi, ICD search
  paths, libnvidia/libGLX_nvidia locations, vulkaninfo --summary)
  before running the actual test.
---
 .../stages/plugin-linux-webgpu-stage.yml      | 25 +++++++++++++++++++
 .../inference/x86_64/python/webgpu/Dockerfile | 13 +++++++++-
 2 files changed, 37 insertions(+), 1 deletion(-)

diff --git a/tools/ci_build/github/azure-pipelines/stages/plugin-linux-webgpu-stage.yml b/tools/ci_build/github/azure-pipelines/stages/plugin-linux-webgpu-stage.yml
index 6c4ecd7abe559..8343a13249318 100644
--- a/tools/ci_build/github/azure-pipelines/stages/plugin-linux-webgpu-stage.yml
+++ b/tools/ci_build/github/azure-pipelines/stages/plugin-linux-webgpu-stage.yml
@@ -188,6 +188,31 @@ stages:
           onnxruntimewebgpuplugin \
           /bin/bash -c "
             set -e -x
+            # --- DIAGNOSTICS (remove once WebGPU EP test is stable) ---
+            echo '=== NVIDIA driver capabilities / visible devices ==='
+            echo \"NVIDIA_DRIVER_CAPABILITIES=\${NVIDIA_DRIVER_CAPABILITIES:-<unset>}\"
+            echo \"NVIDIA_VISIBLE_DEVICES=\${NVIDIA_VISIBLE_DEVICES:-<unset>}\"
+            echo '=== nvidia-smi ==='
+            nvidia-smi || echo 'nvidia-smi failed'
+            echo '=== Vulkan ICD search paths ==='
+            ls -la /usr/share/vulkan/icd.d/ 2>/dev/null || echo '  (no /usr/share/vulkan/icd.d/)'
+            ls -la /etc/vulkan/icd.d/        2>/dev/null || echo '  (no /etc/vulkan/icd.d/)'
+            ls -la /etc/glvnd/egl_vendor.d/  2>/dev/null || echo '  (no /etc/glvnd/egl_vendor.d/)'
+            echo '=== any *_icd.json on the filesystem ==='
+            find / -name '*_icd.json' 2>/dev/null || true
+            echo '=== NVIDIA libs (libGLX_nvidia / libnvidia*) ==='
+            find / \( -name 'libGLX_nvidia*' -o -name 'libnvidia-*' \) 2>/dev/null | head -30 || true
+            echo '=== libvulkan ==='
+            ldconfig -p | grep -i vulkan || echo '  (no libvulkan in ldconfig cache)'
+            echo '=== vulkaninfo (if present) ==='
+            if command -v vulkaninfo >/dev/null 2>&1; then
+              VK_LOADER_DEBUG=error,warn vulkaninfo --summary 2>&1 | head -60 || true
+            else
+              echo '  (vulkaninfo not installed)'
+            fi
+            echo '=== END DIAGNOSTICS ==='
+            # --- END DIAGNOSTICS ---
+
             python3 -m venv /build/test_venv
             source /build/test_venv/bin/activate
             python3 -m pip install onnxruntime onnx numpy
diff --git a/tools/ci_build/github/linux/docker/inference/x86_64/python/webgpu/Dockerfile b/tools/ci_build/github/linux/docker/inference/x86_64/python/webgpu/Dockerfile
index 1327bcbd3740e..e90d6f293e90a 100644
--- a/tools/ci_build/github/linux/docker/inference/x86_64/python/webgpu/Dockerfile
+++ b/tools/ci_build/github/linux/docker/inference/x86_64/python/webgpu/Dockerfile
@@ -7,7 +7,18 @@ RUN cd /tmp/scripts && /tmp/scripts/install_centos.sh && rm -rf /tmp/scripts
 # Install Vulkan loader so Dawn/WebGPU can access the GPU at runtime.
 # The NVIDIA Container Toolkit injects the Vulkan ICD driver when the
 # container is run with --gpus all.
-RUN dnf install -y vulkan-loader && dnf clean all
+# vulkan-tools is included for diagnostic purposes (vulkaninfo).
+# TODO: remove vulkan-tools once the WebGPU EP test is stable in CI.
+RUN dnf install -y vulkan-loader vulkan-tools && dnf clean all
+
+# Tell the NVIDIA Container Toolkit to inject the graphics/Vulkan driver
+# in addition to the default compute/utility. Without "graphics", the
+# NVIDIA Vulkan ICD and userspace driver are not made available inside
+# the container, which produces:
+#   setup_loader_term_phys_devs: Failed to detect any valid GPUs in the current config
+# when Dawn/WebGPU tries to enumerate Vulkan physical devices.
+ENV NVIDIA_DRIVER_CAPABILITIES=compute,utility,graphics
+ENV NVIDIA_VISIBLE_DEVICES=all
 
 ARG BUILD_UID=1001
 ARG BUILD_USER=onnxruntimedev

From 72cb0426bfa166a1c050a5a2ace8f202752c68ed Mon Sep 17 00:00:00 2001
From: edgchen1 <18449977+edgchen1@users.noreply.github.com>
Date: Wed, 22 Apr 2026 10:13:49 -0700
Subject: [PATCH 32/48] Use Mesa lavapipe software Vulkan for WebGPU EP plugin
 Linux test

The NVIDIA Container Toolkit on the GPU CI pool injects NVIDIA libraries
but not the Vulkan ICD JSON, so Dawn fails with 'Failed to detect any
valid GPUs'. Switch the plugin Python test job to Mesa lavapipe (a CPU
Vulkan implementation) instead. This unblocks CI and lets the job run
on the standard CPU pool.

- Dockerfile: add mesa-vulkan-drivers (provides lavapipe). Drop NVIDIA
  env vars. Leave VK_ICD_FILENAMES to the caller so the image stays
  reusable for a future real-GPU test job.
- plugin-linux-webgpu-stage.yml: switch test job to the CPU pool, drop
  --gpus all, and set VK_ICD_FILENAMES / VK_DRIVER_FILES to the lavapipe
  ICD on the docker run command line. Trim diagnostics.
---
 .../stages/plugin-linux-webgpu-stage.yml      | 39 ++++++++-----------
 .../inference/x86_64/python/webgpu/Dockerfile | 24 +++++-------
 2 files changed, 26 insertions(+), 37 deletions(-)

diff --git a/tools/ci_build/github/azure-pipelines/stages/plugin-linux-webgpu-stage.yml b/tools/ci_build/github/azure-pipelines/stages/plugin-linux-webgpu-stage.yml
index 8343a13249318..b499f6fdb2896 100644
--- a/tools/ci_build/github/azure-pipelines/stages/plugin-linux-webgpu-stage.yml
+++ b/tools/ci_build/github/azure-pipelines/stages/plugin-linux-webgpu-stage.yml
@@ -151,13 +151,17 @@ stages:
       displayName: 'Build Python wheel'
 
   # Python package test job
+  #
+  # This job runs against a software Vulkan implementation (Mesa lavapipe)
+  # baked into the Docker image. It does not require a GPU agent, so it
+  # uses the standard CPU pool. See the Dockerfile for VK_ICD_FILENAMES.
   - job: Linux_plugin_webgpu_x64_Python_Test
     dependsOn: Linux_plugin_webgpu_x64_Python_Package
     timeoutInMinutes: 60
     workspace:
       clean: all
     pool:
-      name: ${{ parameters.gpu_machine_pool }}
+      name: ${{ parameters.machine_pool }}
       os: linux
     steps:
     - checkout: self
@@ -181,35 +185,26 @@ stages:
 
     - script: |
         set -e -x
-        docker run --rm --gpus all \
+        # Pin Vulkan to Mesa lavapipe (software Vulkan) so the test does not
+        # require a GPU agent. Keeping these env vars at `docker run` time
+        # (rather than baking them into the image) leaves the image reusable
+        # for a potential future real-GPU test job.
+        lavapipe_icd=/usr/share/vulkan/icd.d/lvp_icd.x86_64.json
+        docker run --rm \
           --volume "$(Build.SourcesDirectory):/onnxruntime_src" \
           --volume "$(Build.BinariesDirectory):/build" \
           --env "PIP_INDEX_URL=${PIP_INDEX_URL}" \
+          --env "VK_ICD_FILENAMES=${lavapipe_icd}" \
+          --env "VK_DRIVER_FILES=${lavapipe_icd}" \
           onnxruntimewebgpuplugin \
           /bin/bash -c "
             set -e -x
             # --- DIAGNOSTICS (remove once WebGPU EP test is stable) ---
-            echo '=== NVIDIA driver capabilities / visible devices ==='
-            echo \"NVIDIA_DRIVER_CAPABILITIES=\${NVIDIA_DRIVER_CAPABILITIES:-<unset>}\"
-            echo \"NVIDIA_VISIBLE_DEVICES=\${NVIDIA_VISIBLE_DEVICES:-<unset>}\"
-            echo '=== nvidia-smi ==='
-            nvidia-smi || echo 'nvidia-smi failed'
-            echo '=== Vulkan ICD search paths ==='
+            echo '=== Vulkan ICD configuration ==='
+            echo \"VK_ICD_FILENAMES=\${VK_ICD_FILENAMES:-<unset>}\"
             ls -la /usr/share/vulkan/icd.d/ 2>/dev/null || echo '  (no /usr/share/vulkan/icd.d/)'
-            ls -la /etc/vulkan/icd.d/        2>/dev/null || echo '  (no /etc/vulkan/icd.d/)'
-            ls -la /etc/glvnd/egl_vendor.d/  2>/dev/null || echo '  (no /etc/glvnd/egl_vendor.d/)'
-            echo '=== any *_icd.json on the filesystem ==='
-            find / -name '*_icd.json' 2>/dev/null || true
-            echo '=== NVIDIA libs (libGLX_nvidia / libnvidia*) ==='
-            find / \( -name 'libGLX_nvidia*' -o -name 'libnvidia-*' \) 2>/dev/null | head -30 || true
-            echo '=== libvulkan ==='
-            ldconfig -p | grep -i vulkan || echo '  (no libvulkan in ldconfig cache)'
-            echo '=== vulkaninfo (if present) ==='
-            if command -v vulkaninfo >/dev/null 2>&1; then
-              VK_LOADER_DEBUG=error,warn vulkaninfo --summary 2>&1 | head -60 || true
-            else
-              echo '  (vulkaninfo not installed)'
-            fi
+            echo '=== vulkaninfo --summary (lavapipe expected) ==='
+            VK_LOADER_DEBUG=error,warn vulkaninfo --summary 2>&1 | head -60 || true
             echo '=== END DIAGNOSTICS ==='
             # --- END DIAGNOSTICS ---
 
diff --git a/tools/ci_build/github/linux/docker/inference/x86_64/python/webgpu/Dockerfile b/tools/ci_build/github/linux/docker/inference/x86_64/python/webgpu/Dockerfile
index e90d6f293e90a..2bf475f43e510 100644
--- a/tools/ci_build/github/linux/docker/inference/x86_64/python/webgpu/Dockerfile
+++ b/tools/ci_build/github/linux/docker/inference/x86_64/python/webgpu/Dockerfile
@@ -4,21 +4,15 @@ FROM $BASEIMAGE
 ADD scripts /tmp/scripts
 RUN cd /tmp/scripts && /tmp/scripts/install_centos.sh && rm -rf /tmp/scripts
 
-# Install Vulkan loader so Dawn/WebGPU can access the GPU at runtime.
-# The NVIDIA Container Toolkit injects the Vulkan ICD driver when the
-# container is run with --gpus all.
-# vulkan-tools is included for diagnostic purposes (vulkaninfo).
-# TODO: remove vulkan-tools once the WebGPU EP test is stable in CI.
-RUN dnf install -y vulkan-loader vulkan-tools && dnf clean all
-
-# Tell the NVIDIA Container Toolkit to inject the graphics/Vulkan driver
-# in addition to the default compute/utility. Without "graphics", the
-# NVIDIA Vulkan ICD and userspace driver are not made available inside
-# the container, which produces:
-#   setup_loader_term_phys_devs: Failed to detect any valid GPUs in the current config
-# when Dawn/WebGPU tries to enumerate Vulkan physical devices.
-ENV NVIDIA_DRIVER_CAPABILITIES=compute,utility,graphics
-ENV NVIDIA_VISIBLE_DEVICES=all
+# Install the Vulkan loader plus Mesa's lavapipe ICD (a software/CPU
+# Vulkan implementation based on LLVMpipe). This lets Dawn/WebGPU run on
+# CI agents that do not expose a GPU to the container. Callers that want
+# to pin Vulkan to lavapipe should set VK_ICD_FILENAMES / VK_DRIVER_FILES
+# at `docker run` time (see plugin-linux-webgpu-stage.yml).
+#
+# vulkan-tools is included for diagnostic purposes (vulkaninfo). It can
+# be removed once the WebGPU EP test is stable.
+RUN dnf install -y vulkan-loader mesa-vulkan-drivers vulkan-tools && dnf clean all
 
 ARG BUILD_UID=1001
 ARG BUILD_USER=onnxruntimedev

From 34a127ae6da4c51067a07cd5f27ef3c6ab8d1c80 Mon Sep 17 00:00:00 2001
From: edgchen1 <18449977+edgchen1@users.noreply.github.com>
Date: Wed, 22 Apr 2026 11:38:31 -0700
Subject: [PATCH 33/48] Split WebGPU plugin EP test stages into separate
 pipeline

Move the Python package test jobs out of the packaging pipeline
(plugin-webgpu-pipeline.yml) into a new resource-triggered pipeline
(plugin-webgpu-test-pipeline.yml), mirroring the
py-packaging-pipeline / py-package-test-pipeline split.

The test pipeline consumes artifacts from the packaging pipeline run
that triggered it (or a run selected at queue time), so the test side
(Dockerfile, Vulkan setup, test script) can be iterated on without
rebuilding Dawn/WebGPU from source.

- New stages/plugin-{linux,win,mac}-webgpu-test-stage.yml with the
  test jobs, downloading the wheel artifact from the 'build' pipeline
  resource.
- Corresponding test jobs removed from
  stages/plugin-{linux,win,mac}-webgpu-stage.yml.
- New top-level plugin-webgpu-test-pipeline.yml wires the platform
  test stages together and declares the packaging pipeline as a
  resource trigger.
---
 .../plugin-webgpu-pipeline.yml                |  5 ++
 .../plugin-webgpu-test-pipeline.yml           | 53 ++++++++++++
 .../stages/plugin-linux-webgpu-stage.yml      | 67 ---------------
 .../stages/plugin-linux-webgpu-test-stage.yml | 86 +++++++++++++++++++
 .../stages/plugin-mac-webgpu-stage.yml        | 36 --------
 .../stages/plugin-mac-webgpu-test-stage.yml   | 37 ++++++++
 .../stages/plugin-win-webgpu-stage.yml        | 51 -----------
 .../stages/plugin-win-webgpu-test-stage.yml   | 60 +++++++++++++
 8 files changed, 241 insertions(+), 154 deletions(-)
 create mode 100644 tools/ci_build/github/azure-pipelines/plugin-webgpu-test-pipeline.yml
 create mode 100644 tools/ci_build/github/azure-pipelines/stages/plugin-linux-webgpu-test-stage.yml
 create mode 100644 tools/ci_build/github/azure-pipelines/stages/plugin-mac-webgpu-test-stage.yml
 create mode 100644 tools/ci_build/github/azure-pipelines/stages/plugin-win-webgpu-test-stage.yml

diff --git a/tools/ci_build/github/azure-pipelines/plugin-webgpu-pipeline.yml b/tools/ci_build/github/azure-pipelines/plugin-webgpu-pipeline.yml
index a9cfc2139fb95..6c3d74ae19878 100644
--- a/tools/ci_build/github/azure-pipelines/plugin-webgpu-pipeline.yml
+++ b/tools/ci_build/github/azure-pipelines/plugin-webgpu-pipeline.yml
@@ -1,5 +1,10 @@
 trigger: none
 
+# Packaging pipeline for the WebGPU EP plugin. This pipeline only builds
+# and publishes artifacts. Tests that consume those artifacts live in
+# plugin-webgpu-test-pipeline.yml, which is resource-triggered on
+# successful runs of this pipeline.
+
 resources:
   repositories:
   - repository: 1esPipelines
diff --git a/tools/ci_build/github/azure-pipelines/plugin-webgpu-test-pipeline.yml b/tools/ci_build/github/azure-pipelines/plugin-webgpu-test-pipeline.yml
new file mode 100644
index 0000000000000..79e7702a66f54
--- /dev/null
+++ b/tools/ci_build/github/azure-pipelines/plugin-webgpu-test-pipeline.yml
@@ -0,0 +1,53 @@
+trigger: none
+
+# This pipeline runs tests against artifacts produced by the WebGPU
+# plugin packaging pipeline. It is resource-triggered on successful
+# packaging runs and can also be queued manually against any prior
+# packaging run.
+#
+# Split from the packaging pipeline so the test side (Dockerfile, Vulkan
+# configuration, test scripts) can be iterated on without rebuilding
+# Dawn/WebGPU from source.
+
+resources:
+  pipelines:
+  - pipeline: build
+    source: 'WebGPU Plugin EP Packaging Pipeline'
+    trigger:
+      branches:
+        include:
+        - main
+
+parameters:
+- name: test_windows_x64
+  displayName: 'Test Windows x64'
+  type: boolean
+  default: true
+
+# Note: Windows ARM64 is not tested here because the test runs on an x64
+# build agent, which cannot execute ARM64 binaries.
+
+- name: test_linux_x64
+  displayName: 'Test Linux x64'
+  type: boolean
+  default: true
+
+- name: test_macos_arm64
+  displayName: 'Test macOS ARM64'
+  type: boolean
+  default: true
+
+stages:
+  # Windows x64
+  - ${{ if eq(parameters.test_windows_x64, true) }}:
+    - template: stages/plugin-win-webgpu-test-stage.yml
+      parameters:
+        arch: 'x64'
+
+  # Linux x64
+  - ${{ if eq(parameters.test_linux_x64, true) }}:
+    - template: stages/plugin-linux-webgpu-test-stage.yml
+
+  # macOS ARM64
+  - ${{ if eq(parameters.test_macos_arm64, true) }}:
+    - template: stages/plugin-mac-webgpu-test-stage.yml
diff --git a/tools/ci_build/github/azure-pipelines/stages/plugin-linux-webgpu-stage.yml b/tools/ci_build/github/azure-pipelines/stages/plugin-linux-webgpu-stage.yml
index b499f6fdb2896..e2afa738b2ac7 100644
--- a/tools/ci_build/github/azure-pipelines/stages/plugin-linux-webgpu-stage.yml
+++ b/tools/ci_build/github/azure-pipelines/stages/plugin-linux-webgpu-stage.yml
@@ -149,70 +149,3 @@ stages:
           -i onnxruntimewebgpuplugin \
           -v "$(PluginPythonPackageVersion)"
       displayName: 'Build Python wheel'
-
-  # Python package test job
-  #
-  # This job runs against a software Vulkan implementation (Mesa lavapipe)
-  # baked into the Docker image. It does not require a GPU agent, so it
-  # uses the standard CPU pool. See the Dockerfile for VK_ICD_FILENAMES.
-  - job: Linux_plugin_webgpu_x64_Python_Test
-    dependsOn: Linux_plugin_webgpu_x64_Python_Package
-    timeoutInMinutes: 60
-    workspace:
-      clean: all
-    pool:
-      name: ${{ parameters.machine_pool }}
-      os: linux
-    steps:
-    - checkout: self
-      clean: true
-      submodules: none
-
-    - template: ../templates/setup-feeds-and-python-steps.yml
-
-    - template: ../templates/get-docker-image-steps.yml
-      parameters:
-        Dockerfile: tools/ci_build/github/linux/docker/inference/x86_64/python/webgpu/Dockerfile
-        Context: tools/ci_build/github/linux/docker/inference/x86_64/python/webgpu
-        DockerBuildArgs: "--build-arg BASEIMAGE=${{ parameters.docker_base_image }} --build-arg BUILD_UID=$( id -u )"
-        Repository: onnxruntimewebgpuplugin
-
-    - task: DownloadPipelineArtifact@2
-      displayName: 'Download Python wheel'
-      inputs:
-        artifactName: webgpu_plugin_python_linux_x64
-        targetPath: '$(Build.BinariesDirectory)/python_wheel'
-
-    - script: |
-        set -e -x
-        # Pin Vulkan to Mesa lavapipe (software Vulkan) so the test does not
-        # require a GPU agent. Keeping these env vars at `docker run` time
-        # (rather than baking them into the image) leaves the image reusable
-        # for a potential future real-GPU test job.
-        lavapipe_icd=/usr/share/vulkan/icd.d/lvp_icd.x86_64.json
-        docker run --rm \
-          --volume "$(Build.SourcesDirectory):/onnxruntime_src" \
-          --volume "$(Build.BinariesDirectory):/build" \
-          --env "PIP_INDEX_URL=${PIP_INDEX_URL}" \
-          --env "VK_ICD_FILENAMES=${lavapipe_icd}" \
-          --env "VK_DRIVER_FILES=${lavapipe_icd}" \
-          onnxruntimewebgpuplugin \
-          /bin/bash -c "
-            set -e -x
-            # --- DIAGNOSTICS (remove once WebGPU EP test is stable) ---
-            echo '=== Vulkan ICD configuration ==='
-            echo \"VK_ICD_FILENAMES=\${VK_ICD_FILENAMES:-<unset>}\"
-            ls -la /usr/share/vulkan/icd.d/ 2>/dev/null || echo '  (no /usr/share/vulkan/icd.d/)'
-            echo '=== vulkaninfo --summary (lavapipe expected) ==='
-            VK_LOADER_DEBUG=error,warn vulkaninfo --summary 2>&1 | head -60 || true
-            echo '=== END DIAGNOSTICS ==='
-            # --- END DIAGNOSTICS ---
-
-            python3 -m venv /build/test_venv
-            source /build/test_venv/bin/activate
-            python3 -m pip install onnxruntime onnx numpy
-            wheel=\$(find /build/python_wheel -name 'onnxruntime_ep_webgpu-*.whl' | head -1)
-            python3 -m pip install \"\$wheel\"
-            python3 -u /onnxruntime_src/plugin-ep-webgpu/python/test/test_webgpu_plugin_ep.py
-          "
-      displayName: 'Install and test Python package'
diff --git a/tools/ci_build/github/azure-pipelines/stages/plugin-linux-webgpu-test-stage.yml b/tools/ci_build/github/azure-pipelines/stages/plugin-linux-webgpu-test-stage.yml
new file mode 100644
index 0000000000000..4c8544dcf3487
--- /dev/null
+++ b/tools/ci_build/github/azure-pipelines/stages/plugin-linux-webgpu-test-stage.yml
@@ -0,0 +1,86 @@
+parameters:
+- name: machine_pool
+  type: string
+  default: 'onnxruntime-Ubuntu2404-AMD-CPU'
+
+- name: docker_base_image
+  type: string
+  default: 'onnxruntimebuildcache.azurecr.io/internal/azureml/onnxruntime/build/cpu_x64_almalinux8_gcc14:20251017.1'
+
+stages:
+# Test stage.
+#
+# This stage runs against a software Vulkan implementation (Mesa lavapipe)
+# installed in the Docker image. It does not require a GPU agent, so it
+# uses the standard CPU pool. The ICD selection is pinned at `docker run`
+# time via VK_ICD_FILENAMES / VK_DRIVER_FILES (see below) so the image
+# remains reusable for a potential future real-GPU test job.
+- stage: Linux_plugin_webgpu_x64_Test
+  dependsOn: []
+  jobs:
+  - job: Linux_plugin_webgpu_x64_Python_Test
+    timeoutInMinutes: 60
+    workspace:
+      clean: all
+    pool:
+      name: ${{ parameters.machine_pool }}
+      os: linux
+    steps:
+    - checkout: self
+      clean: true
+      submodules: none
+
+    - template: ../templates/setup-feeds-and-python-steps.yml
+
+    - template: ../templates/get-docker-image-steps.yml
+      parameters:
+        Dockerfile: tools/ci_build/github/linux/docker/inference/x86_64/python/webgpu/Dockerfile
+        Context: tools/ci_build/github/linux/docker/inference/x86_64/python/webgpu
+        DockerBuildArgs: "--build-arg BASEIMAGE=${{ parameters.docker_base_image }} --build-arg BUILD_UID=$( id -u )"
+        Repository: onnxruntimewebgpuplugin
+
+    # Download the Python wheel produced by the packaging pipeline run that
+    # triggered this pipeline (or that was selected at queue time).
+    - download: build
+      artifact: webgpu_plugin_python_linux_x64
+      displayName: 'Download Python wheel'
+
+    - script: |
+        set -e -x
+        mkdir -p "$(Build.BinariesDirectory)/python_wheel"
+        cp -R "$(Pipeline.Workspace)/build/webgpu_plugin_python_linux_x64/"* "$(Build.BinariesDirectory)/python_wheel/"
+      displayName: 'Stage Python wheel for test container'
+
+    - script: |
+        set -e -x
+        # Pin Vulkan to Mesa lavapipe (software Vulkan) so the test does not
+        # require a GPU agent. Keeping these env vars at `docker run` time
+        # (rather than baking them into the image) leaves the image reusable
+        # for a potential future real-GPU test job.
+        lavapipe_icd=/usr/share/vulkan/icd.d/lvp_icd.x86_64.json
+        docker run --rm \
+          --volume "$(Build.SourcesDirectory):/onnxruntime_src" \
+          --volume "$(Build.BinariesDirectory):/build" \
+          --env "PIP_INDEX_URL=${PIP_INDEX_URL}" \
+          --env "VK_ICD_FILENAMES=${lavapipe_icd}" \
+          --env "VK_DRIVER_FILES=${lavapipe_icd}" \
+          onnxruntimewebgpuplugin \
+          /bin/bash -c "
+            set -e -x
+            # --- DIAGNOSTICS (remove once WebGPU EP test is stable) ---
+            echo '=== Vulkan ICD configuration ==='
+            echo \"VK_ICD_FILENAMES=\${VK_ICD_FILENAMES:-<unset>}\"
+            ls -la /usr/share/vulkan/icd.d/ 2>/dev/null || echo '  (no /usr/share/vulkan/icd.d/)'
+            echo '=== vulkaninfo --summary (lavapipe expected) ==='
+            VK_LOADER_DEBUG=error,warn vulkaninfo --summary 2>&1 | head -60 || true
+            echo '=== END DIAGNOSTICS ==='
+            # --- END DIAGNOSTICS ---
+
+            python3 -m venv /build/test_venv
+            source /build/test_venv/bin/activate
+            python3 -m pip install onnxruntime onnx numpy
+            wheel=\$(find /build/python_wheel -name 'onnxruntime_ep_webgpu-*.whl' | head -1)
+            python3 -m pip install \"\$wheel\"
+            python3 -u /onnxruntime_src/plugin-ep-webgpu/python/test/test_webgpu_plugin_ep.py
+          "
+      displayName: 'Install and test Python package'
diff --git a/tools/ci_build/github/azure-pipelines/stages/plugin-mac-webgpu-stage.yml b/tools/ci_build/github/azure-pipelines/stages/plugin-mac-webgpu-stage.yml
index 4c7d855456e58..be6f997f15b55 100644
--- a/tools/ci_build/github/azure-pipelines/stages/plugin-mac-webgpu-stage.yml
+++ b/tools/ci_build/github/azure-pipelines/stages/plugin-mac-webgpu-stage.yml
@@ -176,39 +176,3 @@ stages:
           --version "$(PluginPythonPackageVersion)" \
           --output_dir "$(Build.ArtifactStagingDirectory)/python"
       displayName: 'Build Python wheel'
-
-  # Python package test job
-  - job: MacOS_plugin_webgpu_arm64_Python_Test
-    dependsOn: MacOS_plugin_webgpu_arm64_Python_Package
-    timeoutInMinutes: 30
-    workspace:
-      clean: all
-    pool:
-      name: AcesShared
-      os: macOS
-      demands:
-      - ImageOverride -equals ACES_VM_SharedPool_Sequoia
-    steps:
-    - checkout: self
-      clean: true
-      submodules: none
-
-    - template: ../templates/setup-build-tools.yml
-      parameters:
-        host_cpu_arch: 'arm64'
-
-    - task: DownloadPipelineArtifact@2
-      displayName: 'Download Python wheel'
-      inputs:
-        artifactName: webgpu_plugin_python_macos_arm64
-        targetPath: '$(Build.BinariesDirectory)/python_wheel'
-
-    - script: |
-        set -e -x
-        python3 -m venv "$(Build.BinariesDirectory)/test_venv"
-        source "$(Build.BinariesDirectory)/test_venv/bin/activate"
-        python3 -m pip install onnxruntime onnx numpy
-        wheel=$(find "$(Build.BinariesDirectory)/python_wheel" -name "onnxruntime_ep_webgpu-*.whl" | head -1)
-        python3 -m pip install "$wheel"
-        python3 -u "$(Build.SourcesDirectory)/plugin-ep-webgpu/python/test/test_webgpu_plugin_ep.py"
-      displayName: 'Install and test Python package'
diff --git a/tools/ci_build/github/azure-pipelines/stages/plugin-mac-webgpu-test-stage.yml b/tools/ci_build/github/azure-pipelines/stages/plugin-mac-webgpu-test-stage.yml
new file mode 100644
index 0000000000000..3f0df195f2bdc
--- /dev/null
+++ b/tools/ci_build/github/azure-pipelines/stages/plugin-mac-webgpu-test-stage.yml
@@ -0,0 +1,37 @@
+stages:
+- stage: MacOS_plugin_webgpu_arm64_Test
+  dependsOn: []
+  jobs:
+  - job: MacOS_plugin_webgpu_arm64_Python_Test
+    timeoutInMinutes: 30
+    workspace:
+      clean: all
+    pool:
+      name: AcesShared
+      os: macOS
+      demands:
+      - ImageOverride -equals ACES_VM_SharedPool_Sequoia
+    steps:
+    - checkout: self
+      clean: true
+      submodules: none
+
+    - template: ../templates/setup-build-tools.yml
+      parameters:
+        host_cpu_arch: 'arm64'
+
+    # Download the Python wheel produced by the packaging pipeline run that
+    # triggered this pipeline (or that was selected at queue time).
+    - download: build
+      artifact: webgpu_plugin_python_macos_arm64
+      displayName: 'Download Python wheel'
+
+    - script: |
+        set -e -x
+        python3 -m venv "$(Build.BinariesDirectory)/test_venv"
+        source "$(Build.BinariesDirectory)/test_venv/bin/activate"
+        python3 -m pip install onnxruntime onnx numpy
+        wheel=$(find "$(Pipeline.Workspace)/build/webgpu_plugin_python_macos_arm64" -name "onnxruntime_ep_webgpu-*.whl" | head -1)
+        python3 -m pip install "$wheel"
+        python3 -u "$(Build.SourcesDirectory)/plugin-ep-webgpu/python/test/test_webgpu_plugin_ep.py"
+      displayName: 'Install and test Python package'
diff --git a/tools/ci_build/github/azure-pipelines/stages/plugin-win-webgpu-stage.yml b/tools/ci_build/github/azure-pipelines/stages/plugin-win-webgpu-stage.yml
index 572c078473eea..7eba2386b32f1 100644
--- a/tools/ci_build/github/azure-pipelines/stages/plugin-win-webgpu-stage.yml
+++ b/tools/ci_build/github/azure-pipelines/stages/plugin-win-webgpu-stage.yml
@@ -307,54 +307,3 @@ stages:
                 --binary_dir "$(Build.BinariesDirectory)\plugin_artifacts\bin" `
                 --version "$(PluginPythonPackageVersion)" `
                 --output_dir "$(Build.ArtifactStagingDirectory)\python"
-
-      # Python package test job
-      - job: Win_plugin_webgpu_${{ parameters.arch }}_Python_Test
-        dependsOn: Win_plugin_webgpu_${{ parameters.arch }}_Python_Package
-        timeoutInMinutes: 30
-        workspace:
-          clean: all
-        pool:
-          name: onnxruntime-Win2022-VS2022-webgpu-A10
-          os: windows
-        steps:
-        - checkout: self
-          clean: true
-          submodules: none
-
-        - template: ../templates/setup-build-tools.yml
-          parameters:
-            host_cpu_arch: 'x64'
-
-        - task: DownloadPipelineArtifact@2
-          displayName: 'Download Python wheel'
-          inputs:
-            artifactName: webgpu_plugin_python_win_${{ parameters.arch }}
-            targetPath: '$(Build.BinariesDirectory)\python_wheel'
-
-        - task: PowerShell@2
-          displayName: 'Install and test Python package'
-          inputs:
-            targetType: inline
-            pwsh: true
-            script: |
-              $ErrorActionPreference = 'Stop'
-
-              echo "creating test_venv"
-              python -m venv "$(Build.BinariesDirectory)\test_venv"
-
-              echo "activating test_venv"
-              & "$(Build.BinariesDirectory)\test_venv\Scripts\Activate.ps1"
-
-              echo "installing onnxruntime onnx numpy"
-              python -m pip install onnxruntime onnx numpy
-              if ($LASTEXITCODE -ne 0) { throw "pip install onnxruntime onnx numpy failed with exit code $LASTEXITCODE" }
-
-              $wheel = (Get-ChildItem "$(Build.BinariesDirectory)\python_wheel\onnxruntime_ep_webgpu-*.whl")[0]
-              echo "installing ${wheel}"
-              python -m pip install $wheel.FullName
-              if ($LASTEXITCODE -ne 0) { throw "pip install wheel failed with exit code $LASTEXITCODE" }
-
-              echo "running test_webgpu_plugin_ep.py"
-              python -u "$(Build.SourcesDirectory)\plugin-ep-webgpu\python\test\test_webgpu_plugin_ep.py"
-              if ($LASTEXITCODE -ne 0) { throw "test_webgpu_plugin_ep.py failed with exit code $LASTEXITCODE (0x$($LASTEXITCODE.ToString('X')))" }
diff --git a/tools/ci_build/github/azure-pipelines/stages/plugin-win-webgpu-test-stage.yml b/tools/ci_build/github/azure-pipelines/stages/plugin-win-webgpu-test-stage.yml
new file mode 100644
index 0000000000000..54169b7b13029
--- /dev/null
+++ b/tools/ci_build/github/azure-pipelines/stages/plugin-win-webgpu-test-stage.yml
@@ -0,0 +1,60 @@
+parameters:
+- name: arch
+  type: string
+  values:
+  - x64
+  - arm64
+
+stages:
+- stage: Win_plugin_webgpu_${{ parameters.arch }}_Test
+  dependsOn: []
+  jobs:
+  - job: Win_plugin_webgpu_${{ parameters.arch }}_Python_Test
+    timeoutInMinutes: 30
+    workspace:
+      clean: all
+    pool:
+      name: onnxruntime-Win2022-VS2022-webgpu-A10
+      os: windows
+    steps:
+    - checkout: self
+      clean: true
+      submodules: none
+
+    - template: ../templates/setup-build-tools.yml
+      parameters:
+        host_cpu_arch: ${{ parameters.arch }}
+
+    # Download the Python wheel produced by the packaging pipeline run that
+    # triggered this pipeline (or that was selected at queue time).
+    - download: build
+      artifact: webgpu_plugin_python_win_${{ parameters.arch }}
+      displayName: 'Download Python wheel'
+
+    - task: PowerShell@2
+      displayName: 'Install and test Python package'
+      inputs:
+        targetType: inline
+        pwsh: true
+        script: |
+          $ErrorActionPreference = 'Stop'
+
+          echo "creating test_venv"
+          python -m venv "$(Build.BinariesDirectory)\test_venv"
+
+          echo "activating test_venv"
+          & "$(Build.BinariesDirectory)\test_venv\Scripts\Activate.ps1"
+
+          echo "installing onnxruntime onnx numpy"
+          python -m pip install onnxruntime onnx numpy
+          if ($LASTEXITCODE -ne 0) { throw "pip install onnxruntime onnx numpy failed with exit code $LASTEXITCODE" }
+
+          $wheelDir = "$(Pipeline.Workspace)\build\webgpu_plugin_python_win_${{ parameters.arch }}"
+          $wheel = (Get-ChildItem "$wheelDir\onnxruntime_ep_webgpu-*.whl")[0]
+          echo "installing ${wheel}"
+          python -m pip install $wheel.FullName
+          if ($LASTEXITCODE -ne 0) { throw "pip install wheel failed with exit code $LASTEXITCODE" }
+
+          echo "running test_webgpu_plugin_ep.py"
+          python -u "$(Build.SourcesDirectory)\plugin-ep-webgpu\python\test\test_webgpu_plugin_ep.py"
+          if ($LASTEXITCODE -ne 0) { throw "test_webgpu_plugin_ep.py failed with exit code $LASTEXITCODE (0x$($LASTEXITCODE.ToString('X')))" }

From 0a6f3134aa13b57fa988b13ce3eb36a8af0613a8 Mon Sep 17 00:00:00 2001
From: edgchen1 <18449977+edgchen1@users.noreply.github.com>
Date: Wed, 22 Apr 2026 13:05:09 -0700
Subject: [PATCH 34/48] Convert plugin-webgpu-test-pipeline.yml to 1ES template

Mirrors the structure of plugin-webgpu-pipeline.yml by extending v1/1ES.Official.PipelineTemplate.yml@1esPipelines. Sets sdl.sourceAnalysisPool explicitly since there is no top-level pool; stage templates pin their own pools. Omits codeSignValidation since this pipeline does not produce or publish binaries.
---
 .../plugin-webgpu-test-pipeline.yml           | 63 +++++++++++++++----
 1 file changed, 51 insertions(+), 12 deletions(-)

diff --git a/tools/ci_build/github/azure-pipelines/plugin-webgpu-test-pipeline.yml b/tools/ci_build/github/azure-pipelines/plugin-webgpu-test-pipeline.yml
index 79e7702a66f54..1de88bffd1c05 100644
--- a/tools/ci_build/github/azure-pipelines/plugin-webgpu-test-pipeline.yml
+++ b/tools/ci_build/github/azure-pipelines/plugin-webgpu-test-pipeline.yml
@@ -17,6 +17,11 @@ resources:
       branches:
         include:
         - main
+  repositories:
+  - repository: 1esPipelines
+    type: git
+    name: 1ESPipelineTemplates/1ESPipelineTemplates
+    ref: refs/tags/release
 
 parameters:
 - name: test_windows_x64
@@ -37,17 +42,51 @@ parameters:
   type: boolean
   default: true
 
-stages:
-  # Windows x64
-  - ${{ if eq(parameters.test_windows_x64, true) }}:
-    - template: stages/plugin-win-webgpu-test-stage.yml
-      parameters:
-        arch: 'x64'
+extends:
+  # The pipeline extends the 1ES PT which will inject SDL and compliance
+  # tasks. Uses "Official" to stay consistent with the companion
+  # WebGPU plugin packaging pipeline.
+  template: v1/1ES.Official.PipelineTemplate.yml@1esPipelines
+  parameters:
+    settings:
+      networkIsolationPolicy: Permissive
+    sdl:
+      # No top-level `pool:` is declared for this pipeline (each stage
+      # template pins its own pool), so source analysis needs an
+      # explicit pool.
+      sourceAnalysisPool:
+        name: onnxruntime-Win-CPU-VS2022-Latest
+        os: windows
+      componentgovernance:
+        ignoreDirectories: '$(Build.Repository.LocalPath)/cmake/external/emsdk/upstream/emscripten/tests,$(Build.Repository.LocalPath)/cmake/external/onnx/third_party/benchmark,$(Build.Repository.LocalPath)/cmake/external/onnx/third_party/pybind11,$(Build.Repository.LocalPath)/cmake/external/onnx/third_party/pybind11/tests,$(Build.Repository.LocalPath)/cmake/external/onnxruntime-extensions,$(Build.Repository.LocalPath)/js/react_native/e2e/node_modules,$(Build.Repository.LocalPath)/js/node_modules,$(Build.Repository.LocalPath)/onnxruntime-inference-examples,$(Build.SourcesDirectory)/cmake/external/emsdk/upstream/emscripten/tests,$(Build.SourcesDirectory)/cmake/external/onnx/third_party/benchmark,$(Build.SourcesDirectory)/cmake/external/onnx/third_party/pybind11,$(Build.SourcesDirectory)/cmake/external/onnx/third_party/pybind11/tests,$(Build.SourcesDirectory)/cmake/external/onnxruntime-extensions,$(Build.SourcesDirectory)/js/react_native/e2e/node_modules,$(Build.SourcesDirectory)/js/node_modules,$(Build.SourcesDirectory)/onnxruntime-inference-examples,$(Build.BinariesDirectory)'
+        alertWarningLevel: High
+        failOnAlert: false
+        verbosity: Normal
+        timeout: 3600
+      tsa:
+        enabled: true
+      # codeSignValidation is intentionally omitted: this pipeline does
+      # not produce or publish binaries. The wheels it consumes were
+      # already signed-and-validated by the packaging pipeline.
+      policheck:
+        enabled: true
+        exclusionsFile: '$(Build.SourcesDirectory)\tools\ci_build\policheck_exclusions.xml'
+      codeql:
+        compiled:
+          enabled: false
+          justificationForDisabling: 'CodeQL is taking nearly 6 hours resulting in timeouts in our production pipelines'
 
-  # Linux x64
-  - ${{ if eq(parameters.test_linux_x64, true) }}:
-    - template: stages/plugin-linux-webgpu-test-stage.yml
+    stages:
+      # Windows x64
+      - ${{ if eq(parameters.test_windows_x64, true) }}:
+        - template: stages/plugin-win-webgpu-test-stage.yml
+          parameters:
+            arch: 'x64'
 
-  # macOS ARM64
-  - ${{ if eq(parameters.test_macos_arm64, true) }}:
-    - template: stages/plugin-mac-webgpu-test-stage.yml
+      # Linux x64
+      - ${{ if eq(parameters.test_linux_x64, true) }}:
+        - template: stages/plugin-linux-webgpu-test-stage.yml
+
+      # macOS ARM64
+      - ${{ if eq(parameters.test_macos_arm64, true) }}:
+        - template: stages/plugin-mac-webgpu-test-stage.yml

From eea7db461e1bedd5a3eb86804f1e2e8716c65a45 Mon Sep 17 00:00:00 2001
From: edgchen1 <18449977+edgchen1@users.noreply.github.com>
Date: Wed, 22 Apr 2026 14:14:08 -0700
Subject: [PATCH 35/48] Replace Mesa lavapipe with SwiftShader for WebGPU
 plugin EP test image

The AlmaLinux 8 based test image ships an old Mesa lavapipe that returns
VK_ERROR_INCOMPATIBLE_DRIVER when Dawn requests a Vulkan 1.3 instance,
causing the WebGPU plugin EP Python test to fail with 'Found no drivers!'
on hosted CI agents.

Build SwiftShader (Google's software Vulkan ICD, used by Dawn for headless
CI) from source in a multi-stage Dockerfile and install it to
/opt/swiftshader. Pin to the commit SHA referenced by Dawn's DEPS.

Update the test stage to point VK_ICD_FILENAMES / VK_DRIVER_FILES at the
SwiftShader ICD instead of lavapipe.

Verified locally: vulkaninfo --summary reports SwiftShader Device
(DRIVER_ID_GOOGLE_SWIFTSHADER) with Vulkan 1.3.
---
 .../stages/plugin-linux-webgpu-test-stage.yml | 29 ++++----
 .../inference/x86_64/python/webgpu/Dockerfile | 69 ++++++++++++++++---
 2 files changed, 76 insertions(+), 22 deletions(-)

diff --git a/tools/ci_build/github/azure-pipelines/stages/plugin-linux-webgpu-test-stage.yml b/tools/ci_build/github/azure-pipelines/stages/plugin-linux-webgpu-test-stage.yml
index 4c8544dcf3487..ec800ee4d5bff 100644
--- a/tools/ci_build/github/azure-pipelines/stages/plugin-linux-webgpu-test-stage.yml
+++ b/tools/ci_build/github/azure-pipelines/stages/plugin-linux-webgpu-test-stage.yml
@@ -10,11 +10,11 @@ parameters:
 stages:
 # Test stage.
 #
-# This stage runs against a software Vulkan implementation (Mesa lavapipe)
-# installed in the Docker image. It does not require a GPU agent, so it
-# uses the standard CPU pool. The ICD selection is pinned at `docker run`
-# time via VK_ICD_FILENAMES / VK_DRIVER_FILES (see below) so the image
-# remains reusable for a potential future real-GPU test job.
+# This stage runs against a software Vulkan implementation (SwiftShader,
+# built from source in the Docker image). It does not require a GPU agent,
+# so it uses the standard CPU pool. The ICD selection is pinned at
+# `docker run` time via VK_ICD_FILENAMES / VK_DRIVER_FILES (see below) so
+# the image remains reusable for a potential future real-GPU test job.
 - stage: Linux_plugin_webgpu_x64_Test
   dependsOn: []
   jobs:
@@ -53,25 +53,26 @@ stages:
 
     - script: |
         set -e -x
-        # Pin Vulkan to Mesa lavapipe (software Vulkan) so the test does not
-        # require a GPU agent. Keeping these env vars at `docker run` time
-        # (rather than baking them into the image) leaves the image reusable
-        # for a potential future real-GPU test job.
-        lavapipe_icd=/usr/share/vulkan/icd.d/lvp_icd.x86_64.json
+        # Pin Vulkan to SwiftShader (software Vulkan, built from source in
+        # the Docker image) so the test does not require a GPU agent.
+        # Keeping these env vars at `docker run` time (rather than baking
+        # them into the image) leaves the image reusable for a potential
+        # future real-GPU test job.
+        swiftshader_icd=/opt/swiftshader/vk_swiftshader_icd.json
         docker run --rm \
           --volume "$(Build.SourcesDirectory):/onnxruntime_src" \
           --volume "$(Build.BinariesDirectory):/build" \
           --env "PIP_INDEX_URL=${PIP_INDEX_URL}" \
-          --env "VK_ICD_FILENAMES=${lavapipe_icd}" \
-          --env "VK_DRIVER_FILES=${lavapipe_icd}" \
+          --env "VK_ICD_FILENAMES=${swiftshader_icd}" \
+          --env "VK_DRIVER_FILES=${swiftshader_icd}" \
           onnxruntimewebgpuplugin \
           /bin/bash -c "
             set -e -x
             # --- DIAGNOSTICS (remove once WebGPU EP test is stable) ---
             echo '=== Vulkan ICD configuration ==='
             echo \"VK_ICD_FILENAMES=\${VK_ICD_FILENAMES:-<unset>}\"
-            ls -la /usr/share/vulkan/icd.d/ 2>/dev/null || echo '  (no /usr/share/vulkan/icd.d/)'
-            echo '=== vulkaninfo --summary (lavapipe expected) ==='
+            ls -la /opt/swiftshader/ 2>/dev/null || echo '  (no /opt/swiftshader/)'
+            echo '=== vulkaninfo --summary (SwiftShader expected) ==='
             VK_LOADER_DEBUG=error,warn vulkaninfo --summary 2>&1 | head -60 || true
             echo '=== END DIAGNOSTICS ==='
             # --- END DIAGNOSTICS ---
diff --git a/tools/ci_build/github/linux/docker/inference/x86_64/python/webgpu/Dockerfile b/tools/ci_build/github/linux/docker/inference/x86_64/python/webgpu/Dockerfile
index 2bf475f43e510..6ecaa632489da 100644
--- a/tools/ci_build/github/linux/docker/inference/x86_64/python/webgpu/Dockerfile
+++ b/tools/ci_build/github/linux/docker/inference/x86_64/python/webgpu/Dockerfile
@@ -1,18 +1,71 @@
 ARG BASEIMAGE=onnxruntimebuildcache.azurecr.io/internal/azureml/onnxruntime/build/cpu_x64_almalinux8_gcc14:20251017.1
+
+# ---------------------------------------------------------------------------
+# Builder stage: build SwiftShader (Google's software Vulkan ICD) from source.
+#
+# Why SwiftShader instead of Mesa lavapipe?
+#   The AlmaLinux 8 base ships an old Mesa lavapipe that rejects Dawn's
+#   requested Vulkan apiVersion with VK_ERROR_INCOMPATIBLE_DRIVER. SwiftShader
+#   is maintained alongside Dawn for headless CI and is self-contained (one
+#   .so + ICD JSON, no Mesa/DRM dependency).
+#
+# SwiftShader has no release tags; pin to the commit SHA that Dawn's DEPS
+# currently references. Bump this when Dawn bumps its SwiftShader revision.
+# ---------------------------------------------------------------------------
+FROM $BASEIMAGE AS swiftshader_builder
+
+ARG SWIFTSHADER_COMMIT=b7b7fd22e5f28079b92412f47f6da4df43e4cd37
+
+RUN dnf install -y git ninja-build && dnf clean all
+
+RUN git -c advice.detachedHead=false init /tmp/swiftshader \
+ && cd /tmp/swiftshader \
+ && git remote add origin https://swiftshader.googlesource.com/SwiftShader \
+ && git fetch --depth 1 origin "${SWIFTSHADER_COMMIT}" \
+ && git checkout FETCH_HEAD \
+ && git submodule update --init --recursive --depth 1
+
+RUN cmake -S /tmp/swiftshader -B /tmp/swiftshader/build -G Ninja \
+      -DCMAKE_BUILD_TYPE=Release \
+      -DCMAKE_POLICY_VERSION_MINIMUM=3.5 \
+      -DSWIFTSHADER_BUILD_TESTS=OFF \
+      -DSWIFTSHADER_BUILD_PVR=OFF \
+      -DSWIFTSHADER_WARNINGS_AS_ERRORS=OFF \
+ && cmake --build /tmp/swiftshader/build --target vk_swiftshader
+
+# Stage the artifacts + rewrite the ICD JSON's library_path to an absolute
+# path so the Vulkan loader can find the .so from any working directory.
+RUN mkdir -p /opt/swiftshader \
+ && cp /tmp/swiftshader/build/Linux/libvk_swiftshader.so /opt/swiftshader/ \
+ && python3 <<'EOF'
+import json
+src = '/tmp/swiftshader/build/Linux/vk_swiftshader_icd.json'
+dst = '/opt/swiftshader/vk_swiftshader_icd.json'
+with open(src) as f:
+    icd = json.load(f)
+icd['ICD']['library_path'] = '/opt/swiftshader/libvk_swiftshader.so'
+with open(dst, 'w') as f:
+    json.dump(icd, f, indent=2)
+EOF
+
+# ---------------------------------------------------------------------------
+# Runtime stage: final test image.
+# ---------------------------------------------------------------------------
 FROM $BASEIMAGE
 
 ADD scripts /tmp/scripts
 RUN cd /tmp/scripts && /tmp/scripts/install_centos.sh && rm -rf /tmp/scripts
 
-# Install the Vulkan loader plus Mesa's lavapipe ICD (a software/CPU
-# Vulkan implementation based on LLVMpipe). This lets Dawn/WebGPU run on
-# CI agents that do not expose a GPU to the container. Callers that want
-# to pin Vulkan to lavapipe should set VK_ICD_FILENAMES / VK_DRIVER_FILES
-# at `docker run` time (see plugin-linux-webgpu-stage.yml).
+# Vulkan loader + vulkaninfo for diagnostics. The SwiftShader ICD itself is
+# copied from the builder stage below. Callers pin the driver at `docker run`
+# time via VK_ICD_FILENAMES / VK_DRIVER_FILES (see
+# plugin-linux-webgpu-test-stage.yml) so this image stays reusable if we
+# ever add a real-GPU variant.
 #
-# vulkan-tools is included for diagnostic purposes (vulkaninfo). It can
-# be removed once the WebGPU EP test is stable.
-RUN dnf install -y vulkan-loader mesa-vulkan-drivers vulkan-tools && dnf clean all
+# vulkan-tools can be dropped once the WebGPU EP test is stable.
+RUN dnf install -y vulkan-loader vulkan-tools && dnf clean all
+
+COPY --from=swiftshader_builder /opt/swiftshader /opt/swiftshader
 
 ARG BUILD_UID=1001
 ARG BUILD_USER=onnxruntimedev

From 1c47acf315860e941d317efb0dcc998c12c6dd51 Mon Sep 17 00:00:00 2001
From: edgchen1 <18449977+edgchen1@users.noreply.github.com>
Date: Wed, 22 Apr 2026 14:20:53 -0700
Subject: [PATCH 36/48] TESTING - put Linux test stage back in packaging
 pipeline

---
 .../stages/plugin-linux-webgpu-stage.yml      | 74 +++++++++++++++++++
 1 file changed, 74 insertions(+)

diff --git a/tools/ci_build/github/azure-pipelines/stages/plugin-linux-webgpu-stage.yml b/tools/ci_build/github/azure-pipelines/stages/plugin-linux-webgpu-stage.yml
index e2afa738b2ac7..a75599ef6e4ab 100644
--- a/tools/ci_build/github/azure-pipelines/stages/plugin-linux-webgpu-stage.yml
+++ b/tools/ci_build/github/azure-pipelines/stages/plugin-linux-webgpu-stage.yml
@@ -149,3 +149,77 @@ stages:
           -i onnxruntimewebgpuplugin \
           -v "$(PluginPythonPackageVersion)"
       displayName: 'Build Python wheel'
+
+# TESTING - temporarily put the test stage here to see if it works.
+- stage: Linux_plugin_webgpu_x64_Test
+  dependsOn: [Linux_plugin_webgpu_x64_Build]
+  jobs:
+  - job: Linux_plugin_webgpu_x64_Python_Test
+    timeoutInMinutes: 60
+    workspace:
+      clean: all
+    pool:
+      name: ${{ parameters.machine_pool }}
+      os: linux
+    steps:
+    - checkout: self
+      clean: true
+      submodules: none
+
+    - template: ../templates/setup-feeds-and-python-steps.yml
+
+    - template: ../templates/get-docker-image-steps.yml
+      parameters:
+        Dockerfile: tools/ci_build/github/linux/docker/inference/x86_64/python/webgpu/Dockerfile
+        Context: tools/ci_build/github/linux/docker/inference/x86_64/python/webgpu
+        DockerBuildArgs: "--build-arg BASEIMAGE=${{ parameters.docker_base_image }} --build-arg BUILD_UID=$( id -u )"
+        Repository: onnxruntimewebgpuplugin
+
+    # Download the Python wheel produced by the Python package job in this
+    # same pipeline run. (In the separate test pipeline this uses
+    # `download: build` against the pipelines resource; here the artifact
+    # comes from `current`.)
+    - download: current
+      artifact: webgpu_plugin_python_linux_x64
+      displayName: 'Download Python wheel'
+
+    - script: |
+        set -e -x
+        mkdir -p "$(Build.BinariesDirectory)/python_wheel"
+        cp -R "$(Pipeline.Workspace)/webgpu_plugin_python_linux_x64/"* "$(Build.BinariesDirectory)/python_wheel/"
+      displayName: 'Stage Python wheel for test container'
+
+    - script: |
+        set -e -x
+        # Pin Vulkan to SwiftShader (software Vulkan, built from source in
+        # the Docker image) so the test does not require a GPU agent.
+        # Keeping these env vars at `docker run` time (rather than baking
+        # them into the image) leaves the image reusable for a potential
+        # future real-GPU test job.
+        swiftshader_icd=/opt/swiftshader/vk_swiftshader_icd.json
+        docker run --rm \
+          --volume "$(Build.SourcesDirectory):/onnxruntime_src" \
+          --volume "$(Build.BinariesDirectory):/build" \
+          --env "PIP_INDEX_URL=${PIP_INDEX_URL}" \
+          --env "VK_ICD_FILENAMES=${swiftshader_icd}" \
+          --env "VK_DRIVER_FILES=${swiftshader_icd}" \
+          onnxruntimewebgpuplugin \
+          /bin/bash -c "
+            set -e -x
+            # --- DIAGNOSTICS (remove once WebGPU EP test is stable) ---
+            echo '=== Vulkan ICD configuration ==='
+            echo \"VK_ICD_FILENAMES=\${VK_ICD_FILENAMES:-<unset>}\"
+            ls -la /opt/swiftshader/ 2>/dev/null || echo '  (no /opt/swiftshader/)'
+            echo '=== vulkaninfo --summary (SwiftShader expected) ==='
+            VK_LOADER_DEBUG=error,warn vulkaninfo --summary 2>&1 | head -60 || true
+            echo '=== END DIAGNOSTICS ==='
+            # --- END DIAGNOSTICS ---
+
+            python3 -m venv /build/test_venv
+            source /build/test_venv/bin/activate
+            python3 -m pip install onnxruntime onnx numpy
+            wheel=\$(find /build/python_wheel -name 'onnxruntime_ep_webgpu-*.whl' | head -1)
+            python3 -m pip install \"\$wheel\"
+            python3 -u /onnxruntime_src/plugin-ep-webgpu/python/test/test_webgpu_plugin_ep.py
+          "
+      displayName: 'Install and test Python package'

From 10c7dee17b351bf68797c3331af95229d3a303f0 Mon Sep 17 00:00:00 2001
From: edgchen1 <18449977+edgchen1@users.noreply.github.com>
Date: Fri, 24 Apr 2026 09:06:32 -0700
Subject: [PATCH 37/48] add pipeline variables to test pipeline

---
 .../azure-pipelines/plugin-webgpu-test-pipeline.yml  | 12 ++++++++++--
 1 file changed, 10 insertions(+), 2 deletions(-)

diff --git a/tools/ci_build/github/azure-pipelines/plugin-webgpu-test-pipeline.yml b/tools/ci_build/github/azure-pipelines/plugin-webgpu-test-pipeline.yml
index 1de88bffd1c05..30813275ab3ad 100644
--- a/tools/ci_build/github/azure-pipelines/plugin-webgpu-test-pipeline.yml
+++ b/tools/ci_build/github/azure-pipelines/plugin-webgpu-test-pipeline.yml
@@ -1,5 +1,3 @@
-trigger: none
-
 # This pipeline runs tests against artifacts produced by the WebGPU
 # plugin packaging pipeline. It is resource-triggered on successful
 # packaging runs and can also be queued manually against any prior
@@ -9,6 +7,16 @@ trigger: none
 # configuration, test scripts) can be iterated on without rebuilding
 # Dawn/WebGPU from source.
 
+trigger: none
+
+variables:
+- name: DisableDockerDetector
+  value: true
+- name: skipNugetSecurityAnalysis
+  value: true
+- name: Codeql.SkipTaskAutoInjection
+  value: true
+
 resources:
   pipelines:
   - pipeline: build

From 6485e4da8c2fbe0341e30b854ef1e57283f7770f Mon Sep 17 00:00:00 2001
From: edgchen1 <18449977+edgchen1@users.noreply.github.com>
Date: Fri, 24 Apr 2026 09:34:55 -0700
Subject: [PATCH 38/48] Revert "TESTING - put Linux test stage back in
 packaging pipeline"

This reverts commit 1c47acf315860e941d317efb0dcc998c12c6dd51.
---
 .../stages/plugin-linux-webgpu-stage.yml      | 74 -------------------
 1 file changed, 74 deletions(-)

diff --git a/tools/ci_build/github/azure-pipelines/stages/plugin-linux-webgpu-stage.yml b/tools/ci_build/github/azure-pipelines/stages/plugin-linux-webgpu-stage.yml
index a75599ef6e4ab..e2afa738b2ac7 100644
--- a/tools/ci_build/github/azure-pipelines/stages/plugin-linux-webgpu-stage.yml
+++ b/tools/ci_build/github/azure-pipelines/stages/plugin-linux-webgpu-stage.yml
@@ -149,77 +149,3 @@ stages:
           -i onnxruntimewebgpuplugin \
           -v "$(PluginPythonPackageVersion)"
       displayName: 'Build Python wheel'
-
-# TESTING - temporarily put the test stage here to see if it works.
-- stage: Linux_plugin_webgpu_x64_Test
-  dependsOn: [Linux_plugin_webgpu_x64_Build]
-  jobs:
-  - job: Linux_plugin_webgpu_x64_Python_Test
-    timeoutInMinutes: 60
-    workspace:
-      clean: all
-    pool:
-      name: ${{ parameters.machine_pool }}
-      os: linux
-    steps:
-    - checkout: self
-      clean: true
-      submodules: none
-
-    - template: ../templates/setup-feeds-and-python-steps.yml
-
-    - template: ../templates/get-docker-image-steps.yml
-      parameters:
-        Dockerfile: tools/ci_build/github/linux/docker/inference/x86_64/python/webgpu/Dockerfile
-        Context: tools/ci_build/github/linux/docker/inference/x86_64/python/webgpu
-        DockerBuildArgs: "--build-arg BASEIMAGE=${{ parameters.docker_base_image }} --build-arg BUILD_UID=$( id -u )"
-        Repository: onnxruntimewebgpuplugin
-
-    # Download the Python wheel produced by the Python package job in this
-    # same pipeline run. (In the separate test pipeline this uses
-    # `download: build` against the pipelines resource; here the artifact
-    # comes from `current`.)
-    - download: current
-      artifact: webgpu_plugin_python_linux_x64
-      displayName: 'Download Python wheel'
-
-    - script: |
-        set -e -x
-        mkdir -p "$(Build.BinariesDirectory)/python_wheel"
-        cp -R "$(Pipeline.Workspace)/webgpu_plugin_python_linux_x64/"* "$(Build.BinariesDirectory)/python_wheel/"
-      displayName: 'Stage Python wheel for test container'
-
-    - script: |
-        set -e -x
-        # Pin Vulkan to SwiftShader (software Vulkan, built from source in
-        # the Docker image) so the test does not require a GPU agent.
-        # Keeping these env vars at `docker run` time (rather than baking
-        # them into the image) leaves the image reusable for a potential
-        # future real-GPU test job.
-        swiftshader_icd=/opt/swiftshader/vk_swiftshader_icd.json
-        docker run --rm \
-          --volume "$(Build.SourcesDirectory):/onnxruntime_src" \
-          --volume "$(Build.BinariesDirectory):/build" \
-          --env "PIP_INDEX_URL=${PIP_INDEX_URL}" \
-          --env "VK_ICD_FILENAMES=${swiftshader_icd}" \
-          --env "VK_DRIVER_FILES=${swiftshader_icd}" \
-          onnxruntimewebgpuplugin \
-          /bin/bash -c "
-            set -e -x
-            # --- DIAGNOSTICS (remove once WebGPU EP test is stable) ---
-            echo '=== Vulkan ICD configuration ==='
-            echo \"VK_ICD_FILENAMES=\${VK_ICD_FILENAMES:-<unset>}\"
-            ls -la /opt/swiftshader/ 2>/dev/null || echo '  (no /opt/swiftshader/)'
-            echo '=== vulkaninfo --summary (SwiftShader expected) ==='
-            VK_LOADER_DEBUG=error,warn vulkaninfo --summary 2>&1 | head -60 || true
-            echo '=== END DIAGNOSTICS ==='
-            # --- END DIAGNOSTICS ---
-
-            python3 -m venv /build/test_venv
-            source /build/test_venv/bin/activate
-            python3 -m pip install onnxruntime onnx numpy
-            wheel=\$(find /build/python_wheel -name 'onnxruntime_ep_webgpu-*.whl' | head -1)
-            python3 -m pip install \"\$wheel\"
-            python3 -u /onnxruntime_src/plugin-ep-webgpu/python/test/test_webgpu_plugin_ep.py
-          "
-      displayName: 'Install and test Python package'

From ace7de50e709718ba8b3ee5f9368f74a0e01084e Mon Sep 17 00:00:00 2001
From: edgchen1 <18449977+edgchen1@users.noreply.github.com>
Date: Fri, 24 Apr 2026 10:45:04 -0700
Subject: [PATCH 39/48] remove/disable some pipeline debugging stuff

---
 .../python/test/test_webgpu_plugin_ep.py      | 32 ++++++++++++-------
 .../stages/plugin-linux-webgpu-test-stage.yml | 10 +-----
 .../inference/x86_64/python/webgpu/Dockerfile | 17 ++++------
 3 files changed, 29 insertions(+), 30 deletions(-)

diff --git a/plugin-ep-webgpu/python/test/test_webgpu_plugin_ep.py b/plugin-ep-webgpu/python/test/test_webgpu_plugin_ep.py
index 5af66a655be8d..fa26c2419cc27 100644
--- a/plugin-ep-webgpu/python/test/test_webgpu_plugin_ep.py
+++ b/plugin-ep-webgpu/python/test/test_webgpu_plugin_ep.py
@@ -24,6 +24,15 @@
 from onnx import TensorProto, helper
 
 
+VERBOSE = os.environ.get("ORT_TEST_VERBOSE", "").strip().lower() in ("1", "true", "yes")
+
+
+def debug_print(*args, **kwargs):
+    """Print only when ORT_TEST_VERBOSE is set to a truthy value."""
+    if VERBOSE:
+        print(*args, **kwargs)
+
+
 def create_mul_model() -> str:
     """Create a simple Mul model and return the path to the saved .onnx file."""
     X = helper.make_tensor_value_info("x", TensorProto.FLOAT, [2, 3])
@@ -60,9 +69,9 @@ def test_import_and_library_path():
     """Test that the package imports and the library path is valid."""
     import onnxruntime_ep_webgpu as webgpu_ep
 
-    print(f"  Package location: {webgpu_ep.__file__}")
+    debug_print(f"  Package location: {webgpu_ep.__file__}")
     pkg_dir = Path(webgpu_ep.__file__).parent
-    print(f"  Package directory contents: {sorted(p.name for p in pkg_dir.iterdir())}")
+    debug_print(f"  Package directory contents: {sorted(p.name for p in pkg_dir.iterdir())}")
 
     lib_path = webgpu_ep.get_library_path()
     assert Path(lib_path).is_file(), f"Library path does not exist: {lib_path}"
@@ -86,15 +95,15 @@ def test_registration_and_inference():
     registration_name = "webgpu_plugin_test"
 
     # Register the plugin EP
-    print(f"  Registering library: {lib_path}")
-    print(f"  Library file size: {Path(lib_path).stat().st_size} bytes")
+    debug_print(f"  Registering library: {lib_path}")
+    debug_print(f"  Library file size: {Path(lib_path).stat().st_size} bytes")
     ort.register_execution_provider_library(registration_name, lib_path)
     print(f"OK: Registered EP library as '{registration_name}'")
 
     try:
         # Discover devices
         all_devices = ort.get_ep_devices()
-        print(f"  All devices: {[(d.ep_name, getattr(d, 'device_id', 'N/A')) for d in all_devices]}")
+        debug_print(f"  All devices: {[(d.ep_name, getattr(d, 'device_id', 'N/A')) for d in all_devices]}")
         webgpu_devices = [d for d in all_devices if d.ep_name == ep_name]
         print(f"Found {len(webgpu_devices)} WebGPU device(s)")
 
@@ -110,9 +119,9 @@ def test_registration_and_inference():
         print("OK: Session options configured with WebGPU EP")
 
         model_path = create_mul_model()
-        print(f"  Model path: {model_path}")
+        debug_print(f"  Model path: {model_path}")
         sess = ort.InferenceSession(model_path, sess_options=sess_options)
-        print(f"  Session providers: {sess.get_providers()}")
+        debug_print(f"  Session providers: {sess.get_providers()}")
         print("OK: InferenceSession created")
 
         # Run inference
@@ -137,11 +146,12 @@ def test_registration_and_inference():
 def main():
     print("=== WebGPU Plugin EP Python Package Test ===")
 
-    # Set verbose logging so ORT internals are visible in CI logs
-    ort.set_default_logger_severity(0)
+    if VERBOSE:
+        # Set verbose ORT logging so ORT internals are visible in CI logs
+        ort.set_default_logger_severity(0)
 
-    print("\n--- Environment ---")
-    print_environment_info()
+        print("\n--- Environment ---")
+        print_environment_info()
 
     print("\n--- Test 1: Import and library path ---")
     test_import_and_library_path()
diff --git a/tools/ci_build/github/azure-pipelines/stages/plugin-linux-webgpu-test-stage.yml b/tools/ci_build/github/azure-pipelines/stages/plugin-linux-webgpu-test-stage.yml
index ec800ee4d5bff..9ce494d4b3a36 100644
--- a/tools/ci_build/github/azure-pipelines/stages/plugin-linux-webgpu-test-stage.yml
+++ b/tools/ci_build/github/azure-pipelines/stages/plugin-linux-webgpu-test-stage.yml
@@ -65,18 +65,10 @@ stages:
           --env "PIP_INDEX_URL=${PIP_INDEX_URL}" \
           --env "VK_ICD_FILENAMES=${swiftshader_icd}" \
           --env "VK_DRIVER_FILES=${swiftshader_icd}" \
+          --env "ORT_TEST_VERBOSE=$(System.Debug)" \
           onnxruntimewebgpuplugin \
           /bin/bash -c "
             set -e -x
-            # --- DIAGNOSTICS (remove once WebGPU EP test is stable) ---
-            echo '=== Vulkan ICD configuration ==='
-            echo \"VK_ICD_FILENAMES=\${VK_ICD_FILENAMES:-<unset>}\"
-            ls -la /opt/swiftshader/ 2>/dev/null || echo '  (no /opt/swiftshader/)'
-            echo '=== vulkaninfo --summary (SwiftShader expected) ==='
-            VK_LOADER_DEBUG=error,warn vulkaninfo --summary 2>&1 | head -60 || true
-            echo '=== END DIAGNOSTICS ==='
-            # --- END DIAGNOSTICS ---
-
             python3 -m venv /build/test_venv
             source /build/test_venv/bin/activate
             python3 -m pip install onnxruntime onnx numpy
diff --git a/tools/ci_build/github/linux/docker/inference/x86_64/python/webgpu/Dockerfile b/tools/ci_build/github/linux/docker/inference/x86_64/python/webgpu/Dockerfile
index 6ecaa632489da..526c129556395 100644
--- a/tools/ci_build/github/linux/docker/inference/x86_64/python/webgpu/Dockerfile
+++ b/tools/ci_build/github/linux/docker/inference/x86_64/python/webgpu/Dockerfile
@@ -9,8 +9,9 @@ ARG BASEIMAGE=onnxruntimebuildcache.azurecr.io/internal/azureml/onnxruntime/buil
 #   is maintained alongside Dawn for headless CI and is self-contained (one
 #   .so + ICD JSON, no Mesa/DRM dependency).
 #
-# SwiftShader has no release tags; pin to the commit SHA that Dawn's DEPS
-# currently references. Bump this when Dawn bumps its SwiftShader revision.
+# SwiftShader has no release tags, so pin to a commit SHA. The ICD must
+# advertise at least the Vulkan apiVersion Dawn requests; picking a SHA from
+# Dawn's DEPS is a convenient way to get one known to satisfy that.
 # ---------------------------------------------------------------------------
 FROM $BASEIMAGE AS swiftshader_builder
 
@@ -56,14 +57,10 @@ FROM $BASEIMAGE
 ADD scripts /tmp/scripts
 RUN cd /tmp/scripts && /tmp/scripts/install_centos.sh && rm -rf /tmp/scripts
 
-# Vulkan loader + vulkaninfo for diagnostics. The SwiftShader ICD itself is
-# copied from the builder stage below. Callers pin the driver at `docker run`
-# time via VK_ICD_FILENAMES / VK_DRIVER_FILES (see
-# plugin-linux-webgpu-test-stage.yml) so this image stays reusable if we
-# ever add a real-GPU variant.
-#
-# vulkan-tools can be dropped once the WebGPU EP test is stable.
-RUN dnf install -y vulkan-loader vulkan-tools && dnf clean all
+# Vulkan loader. The SwiftShader ICD is copied from the builder stage
+# below. Callers opt into SwiftShader at `docker run` time via
+# VK_ICD_FILENAMES / VK_DRIVER_FILES (see plugin-linux-webgpu-test-stage.yml).
+RUN dnf install -y vulkan-loader && dnf clean all
 
 COPY --from=swiftshader_builder /opt/swiftshader /opt/swiftshader
 

From efebfce0f81c4eb9078987c6a48abce65a0db48d Mon Sep 17 00:00:00 2001
From: edgchen1 <18449977+edgchen1@users.noreply.github.com>
Date: Fri, 24 Apr 2026 11:01:11 -0700
Subject: [PATCH 40/48] Plumb version_file parameter through plugin-webgpu
 pipeline templates

Replace the hardcoded plugin-ep-webgpu/VERSION_NUMBER path in set-plugin-build-variables-step.yml with a required version_file parameter, threaded from the top-level pipeline (epVersionFile variable) through the packaging stage down to each platform stage.
---
 .../github/azure-pipelines/plugin-webgpu-pipeline.yml      | 4 ++++
 .../azure-pipelines/stages/plugin-linux-webgpu-stage.yml   | 5 +++++
 .../azure-pipelines/stages/plugin-mac-webgpu-stage.yml     | 5 +++++
 .../stages/plugin-webgpu-packaging-stage.yml               | 7 +++++++
 .../azure-pipelines/stages/plugin-win-webgpu-stage.yml     | 5 +++++
 .../templates/set-plugin-build-variables-step.yml          | 7 +++++--
 6 files changed, 31 insertions(+), 2 deletions(-)

diff --git a/tools/ci_build/github/azure-pipelines/plugin-webgpu-pipeline.yml b/tools/ci_build/github/azure-pipelines/plugin-webgpu-pipeline.yml
index 6c3d74ae19878..aef0f150484d8 100644
--- a/tools/ci_build/github/azure-pipelines/plugin-webgpu-pipeline.yml
+++ b/tools/ci_build/github/azure-pipelines/plugin-webgpu-pipeline.yml
@@ -52,6 +52,9 @@ parameters:
     - MinSizeRel
 
 variables:
+  # Path (relative to the repository root) of the VERSION_NUMBER file used for plugin package versioning.
+  - name: epVersionFile
+    value: plugin-ep-webgpu/VERSION_NUMBER
   # Windows ARM64 build requires Windows x64 build to be enabled (ARM64 cross-compilation depends on x64 build artifacts)
   - name: invalidARM64Config
     value: ${{ and(eq(parameters.build_windows_arm64, true), eq(parameters.build_windows_x64, false)) }}
@@ -119,4 +122,5 @@ extends:
             build_linux_x64: ${{ parameters.build_linux_x64 }}
             build_macos_arm64: ${{ parameters.build_macos_arm64 }}
             package_version: ${{ parameters.package_version }}
+            version_file: ${{ variables.epVersionFile }}
             cmake_build_type: ${{ parameters.cmake_build_type }}
diff --git a/tools/ci_build/github/azure-pipelines/stages/plugin-linux-webgpu-stage.yml b/tools/ci_build/github/azure-pipelines/stages/plugin-linux-webgpu-stage.yml
index e2afa738b2ac7..cb381fa90b00b 100644
--- a/tools/ci_build/github/azure-pipelines/stages/plugin-linux-webgpu-stage.yml
+++ b/tools/ci_build/github/azure-pipelines/stages/plugin-linux-webgpu-stage.yml
@@ -11,6 +11,9 @@ parameters:
   type: string
   default: dev
 
+- name: version_file
+  type: string
+
 - name: cmake_build_type
   type: string
   default: 'Release'
@@ -52,6 +55,7 @@ stages:
       - template: ../templates/set-plugin-build-variables-step.yml
         parameters:
           package_version: ${{ parameters.package_version }}
+          version_file: ${{ parameters.version_file }}
 
       - template: ../templates/setup-feeds-and-python-steps.yml
 
@@ -127,6 +131,7 @@ stages:
     - template: ../templates/set-plugin-build-variables-step.yml
       parameters:
         package_version: ${{ parameters.package_version }}
+        version_file: ${{ parameters.version_file }}
 
     - template: ../templates/setup-feeds-and-python-steps.yml
 
diff --git a/tools/ci_build/github/azure-pipelines/stages/plugin-mac-webgpu-stage.yml b/tools/ci_build/github/azure-pipelines/stages/plugin-mac-webgpu-stage.yml
index be6f997f15b55..eda45406f2480 100644
--- a/tools/ci_build/github/azure-pipelines/stages/plugin-mac-webgpu-stage.yml
+++ b/tools/ci_build/github/azure-pipelines/stages/plugin-mac-webgpu-stage.yml
@@ -3,6 +3,9 @@ parameters:
   type: string
   default: dev
 
+- name: version_file
+  type: string
+
 - name: cmake_build_type
   type: string
   default: 'Release'
@@ -52,6 +55,7 @@ stages:
     - template: ../templates/set-plugin-build-variables-step.yml
       parameters:
         package_version: ${{ parameters.package_version }}
+        version_file: ${{ parameters.version_file }}
 
     - script: |
         set -e -x
@@ -161,6 +165,7 @@ stages:
     - template: ../templates/set-plugin-build-variables-step.yml
       parameters:
         package_version: ${{ parameters.package_version }}
+        version_file: ${{ parameters.version_file }}
 
     - task: DownloadPipelineArtifact@2
       displayName: 'Download plugin build artifacts'
diff --git a/tools/ci_build/github/azure-pipelines/stages/plugin-webgpu-packaging-stage.yml b/tools/ci_build/github/azure-pipelines/stages/plugin-webgpu-packaging-stage.yml
index 1864bb4016bb4..9db25f5727cc2 100644
--- a/tools/ci_build/github/azure-pipelines/stages/plugin-webgpu-packaging-stage.yml
+++ b/tools/ci_build/github/azure-pipelines/stages/plugin-webgpu-packaging-stage.yml
@@ -28,6 +28,9 @@ parameters:
   - release
   - RC
 
+- name: version_file
+  type: string
+
 - name: cmake_build_type
   type: string
   displayName: 'CMake build type'
@@ -45,6 +48,7 @@ stages:
         parameters:
           arch: 'x64'
           package_version: ${{ parameters.package_version }}
+          version_file: ${{ parameters.version_file }}
           cmake_build_type: ${{ parameters.cmake_build_type }}
 
     # Windows ARM64
@@ -55,6 +59,7 @@ stages:
         parameters:
           arch: 'arm64'
           package_version: ${{ parameters.package_version }}
+          version_file: ${{ parameters.version_file }}
           cmake_build_type: ${{ parameters.cmake_build_type }}
 
     # Linux x64
@@ -62,6 +67,7 @@ stages:
       - template: plugin-linux-webgpu-stage.yml
         parameters:
           package_version: ${{ parameters.package_version }}
+          version_file: ${{ parameters.version_file }}
           cmake_build_type: ${{ parameters.cmake_build_type }}
 
     # macOS ARM64
@@ -69,4 +75,5 @@ stages:
       - template: plugin-mac-webgpu-stage.yml
         parameters:
           package_version: ${{ parameters.package_version }}
+          version_file: ${{ parameters.version_file }}
           cmake_build_type: ${{ parameters.cmake_build_type }}
diff --git a/tools/ci_build/github/azure-pipelines/stages/plugin-win-webgpu-stage.yml b/tools/ci_build/github/azure-pipelines/stages/plugin-win-webgpu-stage.yml
index 7eba2386b32f1..051104e115a01 100644
--- a/tools/ci_build/github/azure-pipelines/stages/plugin-win-webgpu-stage.yml
+++ b/tools/ci_build/github/azure-pipelines/stages/plugin-win-webgpu-stage.yml
@@ -9,6 +9,9 @@ parameters:
   type: string
   default: dev
 
+- name: version_file
+  type: string
+
 - name: cmake_build_type
   type: string
   default: 'Release'
@@ -82,6 +85,7 @@ stages:
       - template: ../templates/set-plugin-build-variables-step.yml
         parameters:
           package_version: ${{ parameters.package_version }}
+          version_file: ${{ parameters.version_file }}
 
       - script: |
           python -m pip install -r "$(Build.SourcesDirectory)\tools\ci_build\github\windows\python\requirements.txt"
@@ -289,6 +293,7 @@ stages:
         - template: ../templates/set-plugin-build-variables-step.yml
           parameters:
             package_version: ${{ parameters.package_version }}
+            version_file: ${{ parameters.version_file }}
 
         - task: DownloadPipelineArtifact@2
           displayName: 'Download plugin build artifacts'
diff --git a/tools/ci_build/github/azure-pipelines/templates/set-plugin-build-variables-step.yml b/tools/ci_build/github/azure-pipelines/templates/set-plugin-build-variables-step.yml
index cbcf6ccbfa9d1..2fca91f000102 100644
--- a/tools/ci_build/github/azure-pipelines/templates/set-plugin-build-variables-step.yml
+++ b/tools/ci_build/github/azure-pipelines/templates/set-plugin-build-variables-step.yml
@@ -4,6 +4,8 @@
 parameters:
 - name: package_version
   type: string
+- name: version_file
+  type: string
 
 steps:
 # Set package version string
@@ -18,11 +20,12 @@ steps:
       import sys
 
       package_version = "${{ parameters.package_version }}"
+      version_file_rel = "${{ parameters.version_file }}"
 
       src_root = os.environ.get("BUILD_SOURCESDIRECTORY", "")
-      version_file = os.path.join(src_root, "plugin-ep-webgpu", "VERSION_NUMBER")
+      version_file = os.path.join(src_root, version_file_rel)
       if not os.path.isfile(version_file):
-          print("##vso[task.logissue type=error]Cannot find plugin-ep-webgpu/VERSION_NUMBER at: {}".format(version_file))
+          print("##vso[task.logissue type=error]Cannot find version number file at: {}".format(version_file))
           sys.exit(1)
 
       with open(version_file, "r") as f:

From 4e4f61b82aefbef9832c3cac3f121b6eee2523d3 Mon Sep 17 00:00:00 2001
From: edgchen1 <18449977+edgchen1@users.noreply.github.com>
Date: Fri, 24 Apr 2026 11:05:33 -0700
Subject: [PATCH 41/48] document parameters and limit package_version values in
 set-plugin-build-variables-step.yml

---
 .../templates/set-plugin-build-variables-step.yml      | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/tools/ci_build/github/azure-pipelines/templates/set-plugin-build-variables-step.yml b/tools/ci_build/github/azure-pipelines/templates/set-plugin-build-variables-step.yml
index 2fca91f000102..00e341e81e531 100644
--- a/tools/ci_build/github/azure-pipelines/templates/set-plugin-build-variables-step.yml
+++ b/tools/ci_build/github/azure-pipelines/templates/set-plugin-build-variables-step.yml
@@ -2,8 +2,18 @@
 # variable based on the build type (nightly, official, or dev).
 
 parameters:
+# The package version type: 'release', 'RC', or 'dev'. Controls how the final version
+# string is derived from the contents of the version_file.
 - name: package_version
   type: string
+  values:
+  - release
+  - RC
+  - dev
+
+# Path, relative to the repository root, of the file containing the base version number
+# (e.g. "plugin-ep-webgpu/VERSION_NUMBER"). The file should contain a single semver-like
+# version string (e.g. "1.2.3").
 - name: version_file
   type: string
 

From a78e3a868565fb30a64e9eef5c2fcad6a2fe2846 Mon Sep 17 00:00:00 2001
From: edgchen1 <18449977+edgchen1@users.noreply.github.com>
Date: Fri, 24 Apr 2026 11:28:34 -0700
Subject: [PATCH 42/48] Add top-level plugin-ep-webgpu README and note
 ORT_TEST_VERBOSE in python README

---
 plugin-ep-webgpu/README.md        | 50 +++++++++++++++++++++++++++++++
 plugin-ep-webgpu/python/README.md | 12 ++++----
 2 files changed, 57 insertions(+), 5 deletions(-)
 create mode 100644 plugin-ep-webgpu/README.md

diff --git a/plugin-ep-webgpu/README.md b/plugin-ep-webgpu/README.md
new file mode 100644
index 0000000000000..6ee99f72b3ed2
--- /dev/null
+++ b/plugin-ep-webgpu/README.md
@@ -0,0 +1,50 @@
+# WebGPU Plugin Execution Provider
+
+Packaging sources for the ONNX Runtime WebGPU plugin Execution Provider (EP),
+distributed as a standalone artifact that plugs into an existing ONNX Runtime
+installation rather than being built into the main `onnxruntime` binary.
+
+For more information about plugin EPs, see the documentation [here](https://onnxruntime.ai/docs/execution-providers/plugin-ep-libraries/).
+
+## Contents
+
+- [`VERSION_NUMBER`](VERSION_NUMBER) — Base plugin EP version consumed by the CI
+  pipeline. The pipeline derives the final package version (release, dev) from this
+  via [`tools/ci_build/github/azure-pipelines/templates/set-plugin-build-variables-step.yml`](../tools/ci_build/github/azure-pipelines/templates/set-plugin-build-variables-step.yml).
+- [`python/`](python/) — Sources and build script for the `onnxruntime-ep-webgpu`
+  Python wheel. See [`python/README.md`](python/README.md) for build and test
+  instructions.
+
+## How it fits together
+
+The plugin EP is built as a shared library (`onnxruntime_providers_webgpu.{dll,so,dylib}`)
+by the main ONNX Runtime build (`--use_webgpu shared_lib`). The resulting binaries are
+then packaged into:
+
+- A Python wheel (`onnxruntime-ep-webgpu`), built from [`python/`](python/).
+- A universal package published to the internal ORT-Nightly feed for Windows (x64 /
+  arm64), Linux x64, and macOS arm64.
+
+Packaging is driven by the `WebGPU Plugin EP Packaging Pipeline`
+([`tools/ci_build/github/azure-pipelines/plugin-webgpu-pipeline.yml`](../tools/ci_build/github/azure-pipelines/plugin-webgpu-pipeline.yml)),
+and post-build smoke tests run in the companion `WebGPU Plugin EP Test Pipeline`
+([`tools/ci_build/github/azure-pipelines/plugin-webgpu-test-pipeline.yml`](../tools/ci_build/github/azure-pipelines/plugin-webgpu-test-pipeline.yml)).
+
+## Usage
+
+Once installed, the plugin EP is registered at runtime:
+
+```python
+import onnxruntime as ort
+import onnxruntime_ep_webgpu as webgpu_ep
+
+ort.register_execution_provider_library("webgpu", webgpu_ep.get_library_path())
+
+devices = [d for d in ort.get_ep_devices() if d.ep_name == webgpu_ep.get_ep_name()]
+sess_options = ort.SessionOptions()
+sess_options.add_provider_for_devices(devices, {})
+session = ort.InferenceSession("model.onnx", sess_options=sess_options)
+```
+
+See [`python/onnxruntime_ep_webgpu/README.md`](python/onnxruntime_ep_webgpu/README.md)
+for the user-facing package documentation (this README is bundled into the wheel).
diff --git a/plugin-ep-webgpu/python/README.md b/plugin-ep-webgpu/python/README.md
index 1535cd76c1c30..01be1dab9e138 100644
--- a/plugin-ep-webgpu/python/README.md
+++ b/plugin-ep-webgpu/python/README.md
@@ -1,4 +1,4 @@
-# WebGPU Plugin EP Python Package — Build & Test
+# WebGPU Plugin EP Python Package
 
 This directory contains the packaging source for the `onnxruntime-ep-webgpu` Python package.
 
@@ -46,10 +46,12 @@ pip install dist/onnxruntime_ep_webgpu-*.whl
 python test/test_webgpu_plugin_ep.py
 ```
 
-The test validates import, EP registration, device discovery, and inference (requires WebGPU-capable hardware for the inference portion).
+The test validates import, EP registration, device discovery, and inference (requires
+WebGPU-capable hardware for the inference portion). Set the environment variable
+`ORT_TEST_VERBOSE=1` to print additional diagnostic information (environment,
+available providers, discovered devices, etc.).
 
 ## Versioning
 
-The package version is derived from `plugin-ep-webgpu/VERSION_NUMBER` by the CI pipeline (`set-plugin-build-variables-step.yml`), which produces a PEP 440 version string:
-- **Release**: `X.Y.Z`
-- **Dev**: `X.Y.Z.devYYYYMMDD`
+The package version is derived from `plugin-ep-webgpu/VERSION_NUMBER` by the packaging pipeline, which produces a
+PEP 440 version string.

From 6ca182cffb8494bc6fbdef39a0d6deec46285e37 Mon Sep 17 00:00:00 2001
From: edgchen1 <18449977+edgchen1@users.noreply.github.com>
Date: Fri, 24 Apr 2026 13:34:56 -0700
Subject: [PATCH 43/48] Add onnxruntime>=1.24.4 runtime dependency to
 onnxruntime-ep-webgpu wheel

---
 plugin-ep-webgpu/README.md             | 29 ++++++++++++--------------
 plugin-ep-webgpu/python/README.md      | 17 ++++++++-------
 plugin-ep-webgpu/python/pyproject.toml |  3 +++
 3 files changed, 25 insertions(+), 24 deletions(-)

diff --git a/plugin-ep-webgpu/README.md b/plugin-ep-webgpu/README.md
index 6ee99f72b3ed2..dd874f8af1c3b 100644
--- a/plugin-ep-webgpu/README.md
+++ b/plugin-ep-webgpu/README.md
@@ -1,29 +1,26 @@
 # WebGPU Plugin Execution Provider
 
-Packaging sources for the ONNX Runtime WebGPU plugin Execution Provider (EP),
-distributed as a standalone artifact that plugs into an existing ONNX Runtime
-installation rather than being built into the main `onnxruntime` binary.
+Packaging sources for the ONNX Runtime WebGPU plugin Execution Provider (EP), distributed as a standalone artifact
+that plugs into an existing ONNX Runtime installation rather than being built into the main `onnxruntime` binary.
 
 For more information about plugin EPs, see the documentation [here](https://onnxruntime.ai/docs/execution-providers/plugin-ep-libraries/).
 
 ## Contents
 
-- [`VERSION_NUMBER`](VERSION_NUMBER) — Base plugin EP version consumed by the CI
-  pipeline. The pipeline derives the final package version (release, dev) from this
-  via [`tools/ci_build/github/azure-pipelines/templates/set-plugin-build-variables-step.yml`](../tools/ci_build/github/azure-pipelines/templates/set-plugin-build-variables-step.yml).
-- [`python/`](python/) — Sources and build script for the `onnxruntime-ep-webgpu`
-  Python wheel. See [`python/README.md`](python/README.md) for build and test
-  instructions.
+- [`VERSION_NUMBER`](VERSION_NUMBER) — Base plugin EP version consumed by the CI pipeline. The pipeline derives the
+  final package version (release, dev) from this via
+  [`tools/ci_build/github/azure-pipelines/templates/set-plugin-build-variables-step.yml`](../tools/ci_build/github/azure-pipelines/templates/set-plugin-build-variables-step.yml).
+- [`python/`](python/) — Sources and build script for the `onnxruntime-ep-webgpu` Python wheel. See
+  [`python/README.md`](python/README.md) for build and test instructions.
 
 ## How it fits together
 
-The plugin EP is built as a shared library (`onnxruntime_providers_webgpu.{dll,so,dylib}`)
-by the main ONNX Runtime build (`--use_webgpu shared_lib`). The resulting binaries are
-then packaged into:
+The plugin EP is built as a shared library (`onnxruntime_providers_webgpu.{dll,so,dylib}`) by the main ONNX Runtime
+build (`--use_webgpu shared_lib`). The resulting binaries are then packaged into:
 
 - A Python wheel (`onnxruntime-ep-webgpu`), built from [`python/`](python/).
-- A universal package published to the internal ORT-Nightly feed for Windows (x64 /
-  arm64), Linux x64, and macOS arm64.
+- A universal package published to the internal ORT-Nightly feed for Windows (x64 / arm64), Linux x64, and macOS
+  arm64.
 
 Packaging is driven by the `WebGPU Plugin EP Packaging Pipeline`
 ([`tools/ci_build/github/azure-pipelines/plugin-webgpu-pipeline.yml`](../tools/ci_build/github/azure-pipelines/plugin-webgpu-pipeline.yml)),
@@ -46,5 +43,5 @@ sess_options.add_provider_for_devices(devices, {})
 session = ort.InferenceSession("model.onnx", sess_options=sess_options)
 ```
 
-See [`python/onnxruntime_ep_webgpu/README.md`](python/onnxruntime_ep_webgpu/README.md)
-for the user-facing package documentation (this README is bundled into the wheel).
+See [`python/onnxruntime_ep_webgpu/README.md`](python/onnxruntime_ep_webgpu/README.md) for the user-facing package
+documentation (this README is bundled into the wheel).
diff --git a/plugin-ep-webgpu/python/README.md b/plugin-ep-webgpu/python/README.md
index 01be1dab9e138..efca2f1ee7678 100644
--- a/plugin-ep-webgpu/python/README.md
+++ b/plugin-ep-webgpu/python/README.md
@@ -31,8 +31,7 @@ python build_wheel.py \
   --output_dir ./dist
 ```
 
-The script combines the pre-built plugin EP binaries with the package source to
-produce a platform-specific wheel.
+The script combines the pre-built plugin EP binaries with the package source to produce a platform-specific wheel.
 
 ## Testing
 
@@ -41,15 +40,17 @@ Install the wheel and dependencies in a clean environment, then run the smoke te
 ```bash
 python -m venv test_venv
 source test_venv/bin/activate  # or test_venv\Scripts\Activate.ps1 on Windows
-pip install onnxruntime onnx numpy
-pip install dist/onnxruntime_ep_webgpu-*.whl
+pip install onnx numpy
+pip install dist/onnxruntime_ep_webgpu-*.whl  # pulls in onnxruntime>=1.24.4
 python test/test_webgpu_plugin_ep.py
 ```
 
-The test validates import, EP registration, device discovery, and inference (requires
-WebGPU-capable hardware for the inference portion). Set the environment variable
-`ORT_TEST_VERBOSE=1` to print additional diagnostic information (environment,
-available providers, discovered devices, etc.).
+The wheel declares a runtime dependency on the minimum compatible `onnxruntime` package, so pip will install (or
+verify) a compatible core runtime automatically.
+
+The test validates import, EP registration, device discovery, and inference (requires WebGPU-capable hardware for the
+inference portion). Set the environment variable `ORT_TEST_VERBOSE=1` to print additional diagnostic information
+(environment, available providers, discovered devices, etc.).
 
 ## Versioning
 
diff --git a/plugin-ep-webgpu/python/pyproject.toml b/plugin-ep-webgpu/python/pyproject.toml
index d1475e624d45e..98fd472c1b76f 100644
--- a/plugin-ep-webgpu/python/pyproject.toml
+++ b/plugin-ep-webgpu/python/pyproject.toml
@@ -9,6 +9,9 @@ description = "ONNX Runtime WebGPU Plugin Execution Provider"
 readme = "onnxruntime_ep_webgpu/README.md"
 license = {text = "MIT"}
 requires-python = ">=3.11"
+dependencies = [
+  "onnxruntime>=1.24.4",
+]
 
 [tool.setuptools.packages.find]
 include = ["onnxruntime_ep_webgpu*"]

From 6b6c57d51208270d728418329abd589ab41e56d1 Mon Sep 17 00:00:00 2001
From: edgchen1 <18449977+edgchen1@users.noreply.github.com>
Date: Fri, 24 Apr 2026 13:41:43 -0700
Subject: [PATCH 44/48] add ORT_TEST_VERBOSE environment variable to
 Mac/Windows test jobs

---
 .../azure-pipelines/stages/plugin-mac-webgpu-test-stage.yml     | 2 ++
 .../azure-pipelines/stages/plugin-win-webgpu-test-stage.yml     | 2 ++
 2 files changed, 4 insertions(+)

diff --git a/tools/ci_build/github/azure-pipelines/stages/plugin-mac-webgpu-test-stage.yml b/tools/ci_build/github/azure-pipelines/stages/plugin-mac-webgpu-test-stage.yml
index 3f0df195f2bdc..5ad4e170b2855 100644
--- a/tools/ci_build/github/azure-pipelines/stages/plugin-mac-webgpu-test-stage.yml
+++ b/tools/ci_build/github/azure-pipelines/stages/plugin-mac-webgpu-test-stage.yml
@@ -35,3 +35,5 @@ stages:
         python3 -m pip install "$wheel"
         python3 -u "$(Build.SourcesDirectory)/plugin-ep-webgpu/python/test/test_webgpu_plugin_ep.py"
       displayName: 'Install and test Python package'
+      env:
+        ORT_TEST_VERBOSE: $(System.Debug)
diff --git a/tools/ci_build/github/azure-pipelines/stages/plugin-win-webgpu-test-stage.yml b/tools/ci_build/github/azure-pipelines/stages/plugin-win-webgpu-test-stage.yml
index 54169b7b13029..6664f7716eefa 100644
--- a/tools/ci_build/github/azure-pipelines/stages/plugin-win-webgpu-test-stage.yml
+++ b/tools/ci_build/github/azure-pipelines/stages/plugin-win-webgpu-test-stage.yml
@@ -33,6 +33,8 @@ stages:
 
     - task: PowerShell@2
       displayName: 'Install and test Python package'
+      env:
+        ORT_TEST_VERBOSE: $(System.Debug)
       inputs:
         targetType: inline
         pwsh: true

From 7a91125880acef1a04260115104cf57c410618b5 Mon Sep 17 00:00:00 2001
From: edgchen1 <18449977+edgchen1@users.noreply.github.com>
Date: Fri, 24 Apr 2026 16:59:41 -0700
Subject: [PATCH 45/48] update test pipeline trigger

---
 .../azure-pipelines/plugin-webgpu-test-pipeline.yml       | 8 +-------
 1 file changed, 1 insertion(+), 7 deletions(-)

diff --git a/tools/ci_build/github/azure-pipelines/plugin-webgpu-test-pipeline.yml b/tools/ci_build/github/azure-pipelines/plugin-webgpu-test-pipeline.yml
index 30813275ab3ad..83b28a0d96228 100644
--- a/tools/ci_build/github/azure-pipelines/plugin-webgpu-test-pipeline.yml
+++ b/tools/ci_build/github/azure-pipelines/plugin-webgpu-test-pipeline.yml
@@ -7,8 +7,6 @@
 # configuration, test scripts) can be iterated on without rebuilding
 # Dawn/WebGPU from source.
 
-trigger: none
-
 variables:
 - name: DisableDockerDetector
   value: true
@@ -21,11 +19,7 @@ resources:
   pipelines:
   - pipeline: build
     source: 'WebGPU Plugin EP Packaging Pipeline'
-    trigger:
-      branches:
-        include:
-        - main
-  repositories:
+    trigger: true
   - repository: 1esPipelines
     type: git
     name: 1ESPipelineTemplates/1ESPipelineTemplates

From 349cfaf58c96f63aec84d81c3eff7a81a71246cd Mon Sep 17 00:00:00 2001
From: edgchen1 <18449977+edgchen1@users.noreply.github.com>
Date: Fri, 24 Apr 2026 17:03:54 -0700
Subject: [PATCH 46/48] add back CI trigger specification

---
 .../github/azure-pipelines/plugin-webgpu-test-pipeline.yml      | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/tools/ci_build/github/azure-pipelines/plugin-webgpu-test-pipeline.yml b/tools/ci_build/github/azure-pipelines/plugin-webgpu-test-pipeline.yml
index 83b28a0d96228..65cab8cf56397 100644
--- a/tools/ci_build/github/azure-pipelines/plugin-webgpu-test-pipeline.yml
+++ b/tools/ci_build/github/azure-pipelines/plugin-webgpu-test-pipeline.yml
@@ -7,6 +7,8 @@
 # configuration, test scripts) can be iterated on without rebuilding
 # Dawn/WebGPU from source.
 
+trigger: none
+
 variables:
 - name: DisableDockerDetector
   value: true

From 7c08b501e3b92f99b654d9bf1752be9d24b37203 Mon Sep 17 00:00:00 2001
From: edgchen1 <18449977+edgchen1@users.noreply.github.com>
Date: Fri, 24 Apr 2026 17:26:56 -0700
Subject: [PATCH 47/48] lint issues

---
 plugin-ep-webgpu/python/build_wheel.py        | 19 +++++++++++--------
 .../python/onnxruntime_ep_webgpu/__init__.py  |  6 +++++-
 .../python/test/test_webgpu_plugin_ep.py      | 16 ++++++++--------
 3 files changed, 24 insertions(+), 17 deletions(-)

diff --git a/plugin-ep-webgpu/python/build_wheel.py b/plugin-ep-webgpu/python/build_wheel.py
index 27d55b8c95e2d..9eb07aa8c2d69 100644
--- a/plugin-ep-webgpu/python/build_wheel.py
+++ b/plugin-ep-webgpu/python/build_wheel.py
@@ -76,9 +76,13 @@ def build_wheel(source_dir: Path, wheel_dir: Path):
     """Build the wheel using pip."""
     wheel_dir.mkdir(parents=True, exist_ok=True)
     cmd = [
-        sys.executable, "-m", "pip", "wheel",
+        sys.executable,
+        "-m",
+        "pip",
+        "wheel",
         str(source_dir),
-        "--wheel-dir", str(wheel_dir),
+        "--wheel-dir",
+        str(wheel_dir),
         "--no-deps",
         "--no-build-isolation",
     ]
@@ -133,12 +137,11 @@ def collect_wheels(wheel_dir: Path, output_dir: Path):
 
 def main():
     parser = argparse.ArgumentParser(description="Build onnxruntime-ep-webgpu wheel")
-    parser.add_argument("--binary_dir", required=True, type=Path,
-                        help="Directory containing the built plugin EP binaries")
-    parser.add_argument("--version", required=True,
-                        help="Package version string (PEP 440 format)")
-    parser.add_argument("--output_dir", required=True, type=Path,
-                        help="Directory to place the built wheel")
+    parser.add_argument(
+        "--binary_dir", required=True, type=Path, help="Directory containing the built plugin EP binaries"
+    )
+    parser.add_argument("--version", required=True, help="Package version string (PEP 440 format)")
+    parser.add_argument("--output_dir", required=True, type=Path, help="Directory to place the built wheel")
     args = parser.parse_args()
 
     if not args.binary_dir.is_dir():
diff --git a/plugin-ep-webgpu/python/onnxruntime_ep_webgpu/__init__.py b/plugin-ep-webgpu/python/onnxruntime_ep_webgpu/__init__.py
index 71e2dc3c15944..284269eb0356a 100644
--- a/plugin-ep-webgpu/python/onnxruntime_ep_webgpu/__init__.py
+++ b/plugin-ep-webgpu/python/onnxruntime_ep_webgpu/__init__.py
@@ -8,7 +8,11 @@
 
 import pathlib
 
-__all__ = ["get_library_path", "get_ep_name", "get_ep_names"]
+__all__ = [
+    "get_ep_name",
+    "get_ep_names",
+    "get_library_path",
+]
 
 _module_dir = pathlib.Path(__file__).parent
 
diff --git a/plugin-ep-webgpu/python/test/test_webgpu_plugin_ep.py b/plugin-ep-webgpu/python/test/test_webgpu_plugin_ep.py
index fa26c2419cc27..33d75c7510d46 100644
--- a/plugin-ep-webgpu/python/test/test_webgpu_plugin_ep.py
+++ b/plugin-ep-webgpu/python/test/test_webgpu_plugin_ep.py
@@ -20,9 +20,9 @@
 
 import numpy as np
 import onnx
-import onnxruntime as ort
 from onnx import TensorProto, helper
 
+import onnxruntime as ort
 
 VERBOSE = os.environ.get("ORT_TEST_VERBOSE", "").strip().lower() in ("1", "true", "yes")
 
@@ -35,13 +35,13 @@ def debug_print(*args, **kwargs):
 
 def create_mul_model() -> str:
     """Create a simple Mul model and return the path to the saved .onnx file."""
-    X = helper.make_tensor_value_info("x", TensorProto.FLOAT, [2, 3])
-    Y = helper.make_tensor_value_info("y", TensorProto.FLOAT, [2, 3])
-    Z = helper.make_tensor_value_info("z", TensorProto.FLOAT, [2, 3])
+    x = helper.make_tensor_value_info("x", TensorProto.FLOAT, [2, 3])
+    y = helper.make_tensor_value_info("y", TensorProto.FLOAT, [2, 3])
+    z = helper.make_tensor_value_info("z", TensorProto.FLOAT, [2, 3])
 
     mul_node = helper.make_node("Mul", inputs=["x", "y"], outputs=["z"])
 
-    graph = helper.make_graph([mul_node], "mul_graph", [X, Y], [Z])
+    graph = helper.make_graph([mul_node], "mul_graph", [x, y], [z])
     model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)])
     model.ir_version = 7
 
@@ -67,7 +67,7 @@ def print_environment_info():
 
 def test_import_and_library_path():
     """Test that the package imports and the library path is valid."""
-    import onnxruntime_ep_webgpu as webgpu_ep
+    import onnxruntime_ep_webgpu as webgpu_ep  # noqa: PLC0415  # `import` should be at the top-level of a file.
 
     debug_print(f"  Package location: {webgpu_ep.__file__}")
     pkg_dir = Path(webgpu_ep.__file__).parent
@@ -88,7 +88,7 @@ def test_import_and_library_path():
 
 def test_registration_and_inference():
     """Test EP registration, device discovery, and inference."""
-    import onnxruntime_ep_webgpu as webgpu_ep
+    import onnxruntime_ep_webgpu as webgpu_ep  # noqa: PLC0415  # `import` should be at the top-level of a file.
 
     lib_path = webgpu_ep.get_library_path()
     ep_name = webgpu_ep.get_ep_name()
@@ -133,7 +133,7 @@ def test_registration_and_inference():
         result = outputs[0]
 
         np.testing.assert_allclose(result, expected, rtol=1e-5, atol=1e-5)
-        print(f"OK: Inference result matches expected output")
+        print("OK: Inference result matches expected output")
 
         del sess
         print("OK: Session released")

From 47a4889f6e041fee24286f84706e20d1e29e44b4 Mon Sep 17 00:00:00 2001
From: edgchen1 <18449977+edgchen1@users.noreply.github.com>
Date: Mon, 27 Apr 2026 18:51:06 -0700
Subject: [PATCH 48/48] fix yaml

---
 .../github/azure-pipelines/plugin-webgpu-test-pipeline.yml       | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tools/ci_build/github/azure-pipelines/plugin-webgpu-test-pipeline.yml b/tools/ci_build/github/azure-pipelines/plugin-webgpu-test-pipeline.yml
index 65cab8cf56397..c322437bf7c9f 100644
--- a/tools/ci_build/github/azure-pipelines/plugin-webgpu-test-pipeline.yml
+++ b/tools/ci_build/github/azure-pipelines/plugin-webgpu-test-pipeline.yml
@@ -22,6 +22,7 @@ resources:
   - pipeline: build
     source: 'WebGPU Plugin EP Packaging Pipeline'
     trigger: true
+  repositories:
   - repository: 1esPipelines
     type: git
     name: 1ESPipelineTemplates/1ESPipelineTemplates