diff --git a/.github/workflows/bandit.yml b/.github/workflows/bandit.yml
index 124046d258..c0eb8c1d55 100644
--- a/.github/workflows/bandit.yml
+++ b/.github/workflows/bandit.yml
@@ -12,6 +12,7 @@ permissions:
 
 jobs:
   bandit:
+    if: false
     name: Bandit
     strategy:
       matrix:
diff --git a/.github/workflows/benchmarks-reusable.yml b/.github/workflows/benchmarks-reusable.yml
index 07a76a9846..bfd6064ba1 100644
--- a/.github/workflows/benchmarks-reusable.yml
+++ b/.github/workflows/benchmarks-reusable.yml
@@ -219,14 +219,12 @@ jobs:
         --adapter ${{ matrix.adapter.str_name }}
         --compute-runtime ${{ inputs.compute_runtime_commit }}
         --build-igc
-        --compare baseline
         ${{ inputs.upload_report && '--output-html' || '' }}
-        ${{ inputs.pr_no != 0 && '--output-markdown' || '' }}
         ${{ inputs.bench_script_params }}
 
     - name: Print benchmark results
       run: |
-        cat ${{ github.workspace }}/ur-repo/benchmark_results.md || true
+        cat ${{ github.workspace }}/ur-repo/benchmark_results.md
 
     - name: Add comment to PR
       uses: actions/github-script@60a0d83039c74a4aee543508d2ffcb1c3799cdea # v7.0.1
diff --git a/.github/workflows/benchmarks.yml b/.github/workflows/benchmarks.yml
index d260227214..7de3926daf 100644
--- a/.github/workflows/benchmarks.yml
+++ b/.github/workflows/benchmarks.yml
@@ -24,7 +24,7 @@ on:
         type: number
         required: true
       bench_script_params:
-        description: Parameters passed to the script executing benchmark
+        description: Parameters passed to script executing benchmark
         type: string
         required: false
         default: ''
diff --git a/.github/workflows/build-hw-reusable.yml b/.github/workflows/build-hw-reusable.yml
new file mode 100644
index 0000000000..95242e7e93
--- /dev/null
+++ b/.github/workflows/build-hw-reusable.yml
@@ -0,0 +1,114 @@
+---
+name: Build - Adapters on HW - Reusable
+
+on:
+  workflow_call:
+    inputs:
+      adapter_name:
+        required: true
+        type: string
+      other_adapter_name:
+        required: false
+        type: string
+        default: ""
+      runner_name:
+        required: true
+        type: string
+      platform:
+        description: "Platform string, `UR_CTS_ADAPTER_PLATFORM` will be set to this."
+        required: false
+        type: string
+        default: ""
+      static_loader:
+        required: false
+        type: string
+        default: OFF
+      static_adapter:
+        required: false
+        type: string
+        default: OFF
+
+permissions:
+  contents: read
+
+env:
+  UR_LOG_CUDA: "level:error;flush:error"
+  UR_LOG_HIP: "level:error;flush:error"
+  UR_LOG_LEVEL_ZERO: "level:error;flush:error"
+  UR_LOG_NATIVE_CPU: "level:error;flush:error"
+  UR_LOG_OPENCL: "level:error;flush:error"
+
+jobs:
+  adapter-build-hw:
+    name: Build & CTS
+    if: github.repository == 'oneapi-src/unified-runtime'  # run only on upstream; forks won't have the HW
+    strategy:
+      matrix:
+        adapter: [
+          {
+            name: "${{inputs.adapter_name}}",
+            other_name: "${{inputs.other_adapter_name}}",
+            platform: "${{inputs.platform}}",
+            static_Loader: "${{inputs.static_loader}}",
+            static_adapter: "${{inputs.static_loader}}"
+          }
+        ]
+        build_type: [Release]
+        compiler: [{c: gcc, cxx: g++}]
+
+    runs-on: CUDA_E2E
+
+    steps:
+    - uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1
+
+    - name: Install pip packages
+      run: pip install -r third_party/requirements.txt
+
+    - name: Download DPC++
+      run: |
+        wget -O ${{github.workspace}}/dpcpp_compiler.tar.gz https://github.com/intel/llvm/releases/download/nightly-2024-12-12/sycl_linux.tar.gz
+        mkdir dpcpp_compiler
+        tar -xvf ${{github.workspace}}/dpcpp_compiler.tar.gz -C dpcpp_compiler
+
+    - name: Configure CMake
+      run: >
+        cmake
+        -B${{github.workspace}}/build
+        -DCMAKE_C_COMPILER=${{matrix.compiler.c}}
+        -DCMAKE_CXX_COMPILER=${{matrix.compiler.cxx}}
+        -DCMAKE_BUILD_TYPE=${{matrix.build_type}}
+        -DUR_ENABLE_TRACING=ON
+        -DUR_DEVELOPER_MODE=ON
+        -DUR_BUILD_TESTS=ON
+        -DUR_BUILD_ADAPTER_${{matrix.adapter.name}}=ON
+        -DUR_CONFORMANCE_TEST_LOADER=${{ matrix.adapter.other_name != '' && 'ON' || 'OFF' }}
+        ${{ matrix.adapter.other_name != '' && format('-DUR_BUILD_ADAPTER_{0}=ON', matrix.adapter.other_name) || '' }}
+        -DUR_STATIC_LOADER=${{matrix.adapter.static_Loader}}
+        -DUR_STATIC_ADAPTER_${{matrix.adapter.name}}=${{matrix.adapter.static_adapter}}
+        -DUR_DPCXX=${{github.workspace}}/dpcpp_compiler/bin/clang++
+        -DUR_SYCL_LIBRARY_DIR=${{github.workspace}}/dpcpp_compiler/lib
+        -DCMAKE_INSTALL_PREFIX=${{github.workspace}}/install
+        ${{ matrix.adapter.name == 'HIP' && '-DUR_CONFORMANCE_AMD_ARCH=gfx1030' || '' }}
+        ${{ matrix.adapter.name == 'HIP' && '-DUR_HIP_PLATFORM=AMD' || '' }}
+
+    - name: Build
+      # This is so that device binaries can find the sycl runtime library
+      run: cmake --build ${{github.workspace}}/build -j $(nproc)
+
+    - name: Install
+      # This is to check that install command does not fail
+      run: cmake --install ${{github.workspace}}/build
+
+    - name: Test adapter specific
+      working-directory: ${{github.workspace}}/build
+      run: ctest -C ${{matrix.build_type}} --output-on-failure -L "adapter-specific" -E "memcheck" --timeout 180
+      # Don't run adapter specific tests when building multiple adapters
+      if: ${{ matrix.adapter.other_name == '' }}
+
+    - name: Test adapters
+      working-directory: ${{github.workspace}}/build
+      run: env UR_CTS_ADAPTER_PLATFORM="${{matrix.adapter.platform}}" ctest -C ${{matrix.build_type}} --output-on-failure -L "conformance" --timeout 180
+
+    - name: Get information about platform
+      if: ${{ always() }}
+      run: .github/scripts/get_system_info.sh
diff --git a/.github/workflows/codeql.yml b/.github/workflows/codeql.yml
index fdc5d0c0c0..77af22dfd4 100644
--- a/.github/workflows/codeql.yml
+++ b/.github/workflows/codeql.yml
@@ -11,6 +11,7 @@ permissions:
 
 jobs:
   analyze-ubuntu:
+    if: false
     name: Analyze on Ubuntu
     runs-on: ${{ github.repository_owner == 'oneapi-src' && 'intel-ubuntu-22.04' || 'ubuntu-latest' }}
     permissions:
diff --git a/.github/workflows/e2e.yml b/.github/workflows/e2e.yml
new file mode 100644
index 0000000000..d0b7040d83
--- /dev/null
+++ b/.github/workflows/e2e.yml
@@ -0,0 +1,36 @@
+name: SYCL E2E
+
+on: [push, pull_request]
+
+concurrency:
+  group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }}
+  cancel-in-progress: true
+
+permissions:
+  contents: read
+  pull-requests: write
+
+jobs:
+
+  e2e-level-zero:
+    name: Level Zero
+    permissions:
+      contents: read
+      pull-requests: write
+    uses: ./.github/workflows/e2e_level_zero.yml
+
+  e2e-opencl:
+    name: OpenCL
+    permissions:
+      contents: read
+      pull-requests: write
+    uses: ./.github/workflows/e2e_opencl.yml
+
+  # Causes hangs: https://github.com/oneapi-src/unified-runtime/issues/2398
+  #e2e-cuda:
+  #  name: CUDA
+  #  permissions:
+  #    contents: read
+  #    pull-requests: write
+  #  needs: [ubuntu-build, cuda]
+  #  uses: ./.github/workflows/e2e_cuda.yml
diff --git a/.github/workflows/e2e_core.yml b/.github/workflows/e2e_core.yml
new file mode 100644
index 0000000000..ff5e7bc17c
--- /dev/null
+++ b/.github/workflows/e2e_core.yml
@@ -0,0 +1,220 @@
+name: E2E build & run
+
+on:
+  # this workflow can by only triggered by other workflows
+  # for example by: e2e_cuda.yml or e2e_opencl.yml
+  workflow_call:
+    # acceptable input from adapter-specific workflows
+    inputs:
+      name:
+        description: Adapter name
+        type: string
+        required: true
+      str_name:
+        description: Formatted adapter name
+        type: string
+        required: true
+      prefix:
+        description: Prefix for cmake parameter
+        type: string
+        required: true
+      config:
+        description: Params for sycl configuration
+        type: string
+        required: true
+      unit:
+        description: Test unit (cpu/gpu)
+        type: string
+        required: true
+      runner_tag:
+        description: Tag defifned for the runner
+        type: string
+        required: true
+      xfail:
+        description: Allow test failures
+        type: string
+        required: false
+      xfail_not:
+        description: Not xfail
+        type: string
+        required: false
+      filter_out:
+        description: Tests to filter out completely
+        type: string
+        required: false
+      extra_lit_flags:
+        description: Additional llvm-lit flags to use
+        type: string
+        required: false
+
+permissions:
+  contents: read
+  pull-requests: write
+
+jobs:
+  changed-files:
+    if: false
+    name: Check for changed files
+    runs-on: ${{ github.repository_owner == 'oneapi-src' && 'intel-ubuntu-22.04' || 'ubuntu-latest' }}
+    outputs:
+      any_changed: ${{ steps.get-changed.outputs.any_changed }}
+    steps:
+    - uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1
+    - name: Get changed files
+      id: get-changed
+      uses: tj-actions/changed-files@d6babd6899969df1a11d14c368283ea4436bca78 # v44.5.2
+      with:
+        files: |
+          source/adapters/${{inputs.str_name}}/**
+          source/loader/**
+          .github/workflows/e2e*
+
+  e2e-build-hw:
+    # We want to run the job only if there are changes in the specific adapter
+    if: needs.changed-files.outputs.any_changed == 'true'
+    name: Build SYCL, UR, run E2E
+    needs: changed-files
+    permissions:
+      contents: read
+      pull-requests: write
+
+    # Allow failures, since SYCL tests and API may be not stable
+    continue-on-error: true
+    strategy:
+      matrix:
+        adapter: [
+          {name: "${{inputs.name}}",
+          str_name: "${{inputs.str_name}}",
+          prefix: "${{inputs.prefix}}",
+          config: "${{inputs.config}}",
+          unit: "${{inputs.unit}}",
+          extra_lit_flags: "${{inputs.extra_lit_flags}}"},
+        ]
+        build_type: [Release]
+        compiler: [{c: clang, cxx: clang++}]
+
+    runs-on: ${{inputs.runner_tag}}
+
+    steps:
+    # Workspace on self-hosted runners is not cleaned automatically.
+    # We have to delete the files created outside of using actions.
+    - name: Cleanup self-hosted workspace
+      if: always()
+      run: |
+        ls -la ./
+        rm -rf ./* || true
+
+    - name: Checkout UR
+      uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1
+      with:
+        path: ur-repo
+
+    - name: Checkout SYCL
+      uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1
+      with:
+        repository: intel/llvm
+        ref: refs/heads/sycl
+        path: sycl-repo
+
+    - name: Set CUDA env vars
+      if: matrix.adapter.name == 'CUDA'
+      run: |
+        echo "CUDA_LIB_PATH=/usr/local/cuda/lib64/stubs" >> $GITHUB_ENV
+        echo "LD_LIBRARY_PATH=/usr/local/cuda/compat/:/usr/local/cuda/lib64:$LD_LIBRARY_PATH" >> $GITHUB_ENV
+
+    - name: Configure SYCL
+      run: >
+        python3 sycl-repo/buildbot/configure.py
+        -t ${{matrix.build_type}}
+        -o ${{github.workspace}}/sycl_build
+        --cmake-gen "Ninja"
+        --ci-defaults ${{matrix.adapter.config}}
+        --cmake-opt="-DLLVM_INSTALL_UTILS=ON"
+        --cmake-opt="-DSYCL_PI_TESTS=OFF"
+        --cmake-opt="-DSYCL_UR_USE_FETCH_CONTENT=OFF"
+        --cmake-opt="-DSYCL_UR_SOURCE_DIR=${{github.workspace}}/ur-repo/"
+        --cmake-opt=-DCMAKE_C_COMPILER_LAUNCHER=ccache
+        --cmake-opt=-DCMAKE_CXX_COMPILER_LAUNCHER=ccache
+
+    - name: Build SYCL
+      run: cmake --build ${{github.workspace}}/sycl_build -j
+
+    - name: Set extra llvm-lit options
+      if: matrix.adapter.extra_lit_flags != ''
+      run: echo "LIT_OPTS=${{matrix.adapter.extra_lit_flags}}" >> $GITHUB_ENV
+
+    - name: Run check-sycl
+      # Remove after fixing SYCL test :: abi/layout_handler.cpp
+      # This issue does not affect further execution of e2e with UR.
+      continue-on-error: true
+      run: cmake --build ${{github.workspace}}/sycl_build --target check-sycl
+
+    - name: Set additional env. vars
+      run: |
+        echo "${{github.workspace}}/sycl_build/bin" >> $GITHUB_PATH
+        echo "LD_LIBRARY_PATH=${{github.workspace}}/sycl_build/lib:$LD_LIBRARY_PATH" >> $GITHUB_ENV
+
+    # Running (newly built) sycl-ls sets up some extra variables
+    - name: Setup SYCL variables
+      run: |
+        which clang++ sycl-ls
+        SYCL_UR_TRACE=-1 sycl-ls
+
+    - name: Build e2e tests
+      run: >
+        cmake
+        -GNinja
+        -B ${{github.workspace}}/build-e2e/
+        -S ${{github.workspace}}/sycl-repo/sycl/test-e2e/
+        -DSYCL_TEST_E2E_TARGETS="${{matrix.adapter.prefix}}${{matrix.adapter.str_name}}:${{matrix.adapter.unit}}"
+        -DCMAKE_CXX_COMPILER="$(which clang++)"
+        -DLLVM_LIT="${{github.workspace}}/sycl-repo/llvm/utils/lit/lit.py"
+
+    - name: Set LIT_XFAIL
+      if: inputs.xfail != ''
+      run: echo "LIT_XFAIL=${{inputs.xfail}}" >> $GITHUB_ENV
+
+    - name: Set LIT_FILTER_OUT
+      if: inputs.filter_out != ''
+      run: echo "LIT_FILTER_OUT=${{inputs.filter_out}}" >> $GITHUB_ENV
+
+    - name: Set LIT_XFAIL_NOT
+      if: inputs.xfail_not != ''
+      run: echo "LIT_XFAIL_NOT=${{inputs.xfail_not}}" >> $GITHUB_ENV
+
+    # TODO: remove once intel/llvm lit tests can properly recognize the GPU
+    - name: Configure hardware platform feature for L0
+      if: matrix.adapter.name == 'L0'
+      run: |
+        sed -i '/import lit.llvm/i config.available_features.add("gpu-intel-pvc-1T")' build-e2e/lit.site.cfg.py
+        sed -i '/import lit.llvm/i config.available_features.add("gpu-intel-pvc")' build-e2e/lit.site.cfg.py
+
+    - name: Run e2e tests
+      id: tests
+      run: ninja -C build-e2e check-sycl-e2e || echo "e2e tests have failed. Ignoring failure."
+
+    - name: Get information about platform
+      if: ${{ always() }}
+      working-directory: ${{github.workspace}}/ur-repo
+      run: .github/scripts/get_system_info.sh
+
+    # FIXME: Requires pull-request: write permissions but this is only granted
+    # on pull requests from forks if using pull_request_target workflow
+    # trigger but not the pull_request trigger..
+    # - name: Add comment to PR
+    #   uses: actions/github-script@60a0d83039c74a4aee543508d2ffcb1c3799cdea # v7.0.1
+    #   if: ${{ always() }}
+    #   with:
+    #     script: |
+    #       const adapter = '${{ matrix.adapter.name }}';
+    #       const url = '${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}';
+    #       const test_status = '${{ steps.tests.outcome }}';
+    #       const job_status = '${{ job.status }}';
+    #       const body = `E2E ${adapter} build:\n${url}\nJob status: ${job_status}. Test status: ${test_status}`;
+
+    #       github.rest.issues.createComment({
+    #         issue_number: context.issue.number,
+    #         owner: context.repo.owner,
+    #         repo: context.repo.repo,
+    #         body: body
+    #       })
diff --git a/.github/workflows/e2e_cuda.yml b/.github/workflows/e2e_cuda.yml
new file mode 100644
index 0000000000..c2f1d969b8
--- /dev/null
+++ b/.github/workflows/e2e_cuda.yml
@@ -0,0 +1,24 @@
+name: E2E Cuda
+
+on:
+  workflow_call:
+
+permissions:
+  contents: read
+  pull-requests: write
+
+jobs:
+  e2e-build-hw:
+    if: github.repository == 'oneapi-src/unified-runtime'  # run only on upstream; forks will not have the HW
+    name: Start e2e job
+    # use core flow, run it with cuda specific parameters
+    uses: ./.github/workflows/e2e_core.yml
+    with:
+      name: "CUDA"
+      runner_tag: "CUDA_E2E"
+      str_name: "cuda"
+      prefix: "ext_oneapi_"
+      config: "--cuda"
+      unit: "gpu"
+      extra_lit_flags: "-sv --max-time=3600"
+      xfail: "Regression/device_num.cpp"
diff --git a/.github/workflows/e2e_level_zero.yml b/.github/workflows/e2e_level_zero.yml
new file mode 100644
index 0000000000..1fd814f271
--- /dev/null
+++ b/.github/workflows/e2e_level_zero.yml
@@ -0,0 +1,31 @@
+name: E2E Level Zero
+
+on:
+  workflow_call:
+
+permissions:
+  contents: read
+  pull-requests: write
+
+jobs:
+  e2e-build-hw:
+    if: github.repository == 'oneapi-src/unified-runtime'  # run only on upstream; forks will not have the HW
+    name: Start e2e job
+    # use core flow, run it with L0 specific parameters
+    uses: ./.github/workflows/e2e_core.yml
+    with:
+      name: "L0"
+      runner_tag: "L0_E2E"
+      str_name: "level_zero"
+      prefix: "ext_oneapi_"
+      config: ""
+      unit: "gpu"
+      # Failing tests
+      xfail: "InvokeSimd/Regression/call_vadd_1d_spill.cpp;InvokeSimd/Regression/ImplicitSubgroup/call_vadd_1d_spill.cpp;ESIMD/mask_expand_load.cpp;Matrix/joint_matrix_prefetch.cpp;ESIMD/mask_expand_load.cpp;Matrix/SPVCooperativeMatrix/joint_matrix_prefetch.cpp;Matrix/joint_matrix_bf16_fill_k_cache_prefetch.cpp;Matrix/SPVCooperativeMatrix/element_wise_ops.cpp;"
+      # Unexpectedly Passed Tests
+      xfail_not: ""
+      # Flaky tests
+      filter_out: "Basic/accessor/accessor.cpp|DeviceArchitecture/device_architecture_comparison_on_device_aot.cpp|Graph/Explicit/interop-level-zero-launch-kernel.cpp|Graph/RecordReplay/interop-level-zero-launch-kernel.cpp|syclcompat/launch/launch_policy_lmem.cpp"
+      # These runners by default spawn upwards of 260 workers.
+      # We also add a time out just in case some test hangs
+      extra_lit_flags: "--param gpu-intel-pvc=True --param gpu-intel-pvc-1T=True -sv -j 100 --max-time=3600"
diff --git a/.github/workflows/e2e_opencl.yml b/.github/workflows/e2e_opencl.yml
new file mode 100644
index 0000000000..e4714b2434
--- /dev/null
+++ b/.github/workflows/e2e_opencl.yml
@@ -0,0 +1,24 @@
+name: E2E OpenCL
+
+on:
+  workflow_call:
+
+permissions:
+  contents: read
+  pull-requests: write
+
+jobs:
+  e2e-build-hw:
+    if: github.repository == 'oneapi-src/unified-runtime'  # run only on upstream; forks will not have the HW
+    name: Start e2e job
+    # use core flow, run it with OpenCL specific parameters
+    uses: ./.github/workflows/e2e_core.yml
+    with:
+      name: "OPENCL"
+      runner_tag: "OPENCL"
+      str_name: "opencl"
+      prefix: ""
+      config: ""
+      unit: "cpu"
+      xfail: "AOT/double.cpp;AOT/half.cpp;AOT/reqd-sg-size.cpp;Basic/built-ins/marray_geometric.cpp;KernelCompiler/kernel_compiler_spirv.cpp;KernelCompiler/opencl_queries.cpp;NonUniformGroups/ballot_group.cpp;NonUniformGroups/ballot_group_algorithms.cpp;NonUniformGroups/fixed_size_group_algorithms.cpp;NonUniformGroups/opportunistic_group.cpp;NonUniformGroups/opportunistic_group_algorithms.cpp;NonUniformGroups/tangle_group.cpp;NonUniformGroups/tangle_group_algorithms.cpp"
+      extra_lit_flags: "-sv --max-time=3600"
diff --git a/.github/workflows/multi_device.yml b/.github/workflows/multi_device.yml
index 5334e86b87..2dd5d60352 100644
--- a/.github/workflows/multi_device.yml
+++ b/.github/workflows/multi_device.yml
@@ -13,7 +13,7 @@ permissions:
 jobs:
   examples:
     name: Multi Device testing
-    if: github.repository == 'oneapi-src/unified-runtime'  # run only on upstream; forks won't have the HW
+    if: false
     strategy:
       matrix:
         adapter: [
diff --git a/.github/workflows/source-checks.yml b/.github/workflows/source-checks.yml
new file mode 100644
index 0000000000..8c43adf952
--- /dev/null
+++ b/.github/workflows/source-checks.yml
@@ -0,0 +1,66 @@
+on:
+  workflow_call:
+
+permissions:
+  contents: read
+
+jobs:
+  source-checks:
+    if: false
+    strategy:
+      matrix:
+        os: ['ubuntu-22.04', 'windows-2022']
+
+    runs-on: ${{matrix.os}}
+
+    steps:
+    - uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1
+
+    - uses: actions/setup-python@0a5c61591373683505ea898e09a3ea4f39ef2b9c # v5.0.0
+      with:
+        python-version: 3.9
+
+    - name: Install pip packages
+      run: pip install -r third_party/requirements.txt
+
+    - name: "[Lin] Install doxygen"
+      if: matrix.os == 'ubuntu-22.04'
+      run: |
+        sudo apt-get update
+        sudo apt-get install -y doxygen
+
+    - name: "[Win] Install doxygen"
+      if: matrix.os == 'windows-2022'
+      run: |
+        $WorkingDir = $PWD.Path
+        Invoke-WebRequest -Uri https://github.com/doxygen/doxygen/releases/download/Release_1_9_8/doxygen-1.9.8.windows.x64.bin.zip -OutFile "$WorkingDir\doxygen.zip"
+        Expand-Archive -Path "$WorkingDir\doxygen.zip"
+        Add-Content $env:GITHUB_PATH "$WorkingDir\doxygen"
+
+    - name: "[Lin] Install hwloc"
+      if: matrix.os == 'ubuntu-22.04'
+      run: .github/scripts/install_hwloc.sh
+
+    - name: "[Win] Install hwloc"
+      if: matrix.os == 'windows-2022'
+      run: vcpkg install hwloc:x64-windows
+
+    - name: Configure CMake
+      env:
+        VCPKG_PATH: "C:/vcpkg/packages/hwloc_x64-windows"
+      run: >
+        cmake
+        -B${{github.workspace}}/build
+        -DCMAKE_PREFIX_PATH="${{env.VCPKG_PATH}}"
+        -DUR_ENABLE_TRACING=OFF
+        -DCMAKE_BUILD_TYPE=Debug
+        -DUR_BUILD_TESTS=OFF
+        -DUR_FORMAT_CPP_STYLE=ON
+
+    # Verifying license should be enough on a single OS
+    - name: Verify that each source file contains a license
+      if: matrix.os == 'ubuntu-22.04'
+      run: cmake --build ${{github.workspace}}/build --target verify-licenses
+
+    - name: Generate source from spec, check for uncommitted diff
+      run: cmake --build ${{github.workspace}}/build --target check-generated
diff --git a/.github/workflows/trivy.yml b/.github/workflows/trivy.yml
index c2ef1d47e7..ba7271180f 100644
--- a/.github/workflows/trivy.yml
+++ b/.github/workflows/trivy.yml
@@ -18,6 +18,7 @@ permissions:
 
 jobs:
   linux:
+    if: false
     name: Trivy
     runs-on: ${{ github.repository_owner == 'oneapi-src' && 'intel-ubuntu-22.04' || 'ubuntu-latest' }}
     permissions:
diff --git a/.github/workflows/unified-runtime.yml b/.github/workflows/unified-runtime.yml
new file mode 100644
index 0000000000..a454d77fb6
--- /dev/null
+++ b/.github/workflows/unified-runtime.yml
@@ -0,0 +1,18 @@
+name: Unified Runtime
+
+on: [push, pull_request]
+
+concurrency:
+  group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }}
+  cancel-in-progress: true
+
+permissions:
+  contents: read
+
+jobs:
+  cuda:
+    name: CUDA
+    uses: ./.github/workflows/build-hw-reusable.yml
+    with:
+      adapter_name: CUDA
+      runner_name: CUDA
diff --git a/test/adapters/cuda/context_tests.cpp b/test/adapters/cuda/context_tests.cpp
index 77d0b42cd0..4f4df4683c 100644
--- a/test/adapters/cuda/context_tests.cpp
+++ b/test/adapters/cuda/context_tests.cpp
@@ -43,37 +43,6 @@ TEST_P(cudaUrContextCreateTest, CreateWithChildThread) {
   callContextFromOtherThread.join();
 }
 
-TEST_P(cudaUrContextCreateTest, ActiveContext) {
-  uur::raii::Context context = nullptr;
-  ASSERT_SUCCESS(urContextCreate(1, &device, nullptr, context.ptr()));
-  ASSERT_NE(context, nullptr);
-
-  uur::raii::Queue queue = nullptr;
-  ur_queue_properties_t queue_props{UR_STRUCTURE_TYPE_QUEUE_PROPERTIES, nullptr,
-                                    0};
-  ASSERT_SUCCESS(urQueueCreate(context, device, &queue_props, queue.ptr()));
-  ASSERT_NE(queue, nullptr);
-
-  // check that the queue has the correct context
-  ASSERT_EQ(context, queue->getContext());
-
-  // create a buffer
-  uur::raii::Mem buffer = nullptr;
-  ASSERT_SUCCESS(urMemBufferCreate(context, UR_MEM_FLAG_READ_WRITE, 1024,
-                                   nullptr, buffer.ptr()));
-  ASSERT_NE(buffer, nullptr);
-
-  // check that the context is now the active CUDA context
-  CUcontext cudaCtx = nullptr;
-  ASSERT_SUCCESS_CUDA(cuCtxGetCurrent(&cudaCtx));
-  ASSERT_NE(cudaCtx, nullptr);
-
-  ur_native_handle_t native_context = 0;
-  ASSERT_SUCCESS(urContextGetNativeHandle(context, &native_context));
-  ASSERT_NE(reinterpret_cast<CUcontext>(native_context), nullptr);
-  ASSERT_EQ(cudaCtx, reinterpret_cast<CUcontext>(native_context));
-}
-
 TEST_P(cudaUrContextCreateTest, ContextLifetimeExisting) {
   // start by setting up a CUDA context on the thread
   CUcontext original;
diff --git a/test/adapters/cuda/memory_tests.cpp b/test/adapters/cuda/memory_tests.cpp
index 6839b0b95f..ac4bfefdd7 100644
--- a/test/adapters/cuda/memory_tests.cpp
+++ b/test/adapters/cuda/memory_tests.cpp
@@ -14,11 +14,12 @@ TEST_P(cudaMemoryTest, urMemBufferNoActiveContext) {
   constexpr size_t memSize = 1024u;
 
   CUcontext current = nullptr;
-  do {
+  ASSERT_SUCCESS_CUDA(cuCtxGetCurrent(&current));
+  while (current != nullptr) {
     CUcontext oldContext = nullptr;
     ASSERT_SUCCESS_CUDA(cuCtxPopCurrent(&oldContext));
     ASSERT_SUCCESS_CUDA(cuCtxGetCurrent(&current));
-  } while (current != nullptr);
+  }
 
   uur::raii::Mem mem;
   ASSERT_SUCCESS(urMemBufferCreate(context, UR_MEM_FLAG_READ_WRITE, memSize,
diff --git a/test/conformance/enqueue/helpers.h b/test/conformance/enqueue/helpers.h
index a4d127c4e5..fd8c428a52 100644
--- a/test/conformance/enqueue/helpers.h
+++ b/test/conformance/enqueue/helpers.h
@@ -203,7 +203,15 @@ struct urMultiQueueMultiDeviceTestWithParam
         urContextCreate(devices.size(), devices.data(), nullptr, &context));
 
     // Duplicate our devices until we hit the minimum size specified.
-    auto srcDevices = devices;
+    std::vector<ur_device_handle_t> srcDevices;
+    // If the test actually only wants one device duplicated a bunch of times
+    // we take devices[0] and discard any other devices that were discovered.
+    if (trueMultiDevice) {
+      srcDevices = devices;
+    } else {
+      srcDevices.push_back(devices[0]);
+      devices.clear();
+    }
     while (devices.size() < minDevices) {
       devices.insert(devices.end(), srcDevices.begin(), srcDevices.end());
     }
@@ -224,6 +232,7 @@ struct urMultiQueueMultiDeviceTestWithParam
 
   ur_context_handle_t context;
   std::vector<ur_queue_handle_t> queues;
+  bool trueMultiDevice = true;
 };
 
 } // namespace uur
diff --git a/test/conformance/enqueue/urEnqueueKernelLaunch.cpp b/test/conformance/enqueue/urEnqueueKernelLaunch.cpp
index ef5c0228ed..3dd977d556 100644
--- a/test/conformance/enqueue/urEnqueueKernelLaunch.cpp
+++ b/test/conformance/enqueue/urEnqueueKernelLaunch.cpp
@@ -565,7 +565,7 @@ UUR_INSTANTIATE_PLATFORM_TEST_SUITE(urEnqueueKernelLaunchMultiDeviceTest);
 // TODO: rewrite this test, right now it only works for a single queue
 // (the context is only created for one device)
 TEST_P(urEnqueueKernelLaunchMultiDeviceTest, KernelLaunchReadDifferentQueues) {
-  UUR_KNOWN_FAILURE_ON(uur::LevelZero{}, uur::LevelZeroV2{});
+  UUR_KNOWN_FAILURE_ON(uur::CUDA{}, uur::LevelZero{}, uur::LevelZeroV2{});
 
   uur::KernelLaunchHelper helper =
       uur::KernelLaunchHelper{platform, context, kernel, queues[0]};
diff --git a/test/conformance/enqueue/urEnqueueKernelLaunchAndMemcpyInOrder.cpp b/test/conformance/enqueue/urEnqueueKernelLaunchAndMemcpyInOrder.cpp
index c97471aee8..1102892d15 100644
--- a/test/conformance/enqueue/urEnqueueKernelLaunchAndMemcpyInOrder.cpp
+++ b/test/conformance/enqueue/urEnqueueKernelLaunchAndMemcpyInOrder.cpp
@@ -155,13 +155,14 @@ struct urEnqueueKernelLaunchIncrementTest
 
   using Param = uur::BoolTestParam;
 
-  using urMultiQueueLaunchMemcpyTest<numOps, Param>::context;
   using urMultiQueueLaunchMemcpyTest<numOps, Param>::queues;
-  using urMultiQueueLaunchMemcpyTest<numOps, Param>::devices;
   using urMultiQueueLaunchMemcpyTest<numOps, Param>::kernels;
   using urMultiQueueLaunchMemcpyTest<numOps, Param>::SharedMem;
 
   void SetUp() override {
+    // We actually need a single device used multiple times for this test, as
+    // opposed to utilizing all available devices for the platform.
+    this->trueMultiDevice = false;
     UUR_RETURN_ON_FATAL_FAILURE(
         urMultiQueueLaunchMemcpyTest<numOps, Param>::
             SetUp()); // Use single device, duplicated numOps times
@@ -344,9 +345,28 @@ TEST_P(urEnqueueKernelLaunchIncrementMultiDeviceTest, Success) {
   }
 }
 
-using urEnqueueKernelLaunchIncrementMultiDeviceMultiThreadTest =
-    urEnqueueKernelLaunchIncrementMultiDeviceTestWithParam<
-        std::tuple<uur::BoolTestParam, uur::BoolTestParam>>;
+struct urEnqueueKernelLaunchIncrementMultiDeviceMultiThreadTest
+    : urEnqueueKernelLaunchIncrementMultiDeviceTestWithParam<
+          std::tuple<uur::BoolTestParam, uur::BoolTestParam>> {
+  using Param = std::tuple<uur::BoolTestParam, uur::BoolTestParam>;
+
+  using urEnqueueKernelLaunchIncrementMultiDeviceTestWithParam<Param>::devices;
+  using urEnqueueKernelLaunchIncrementMultiDeviceTestWithParam<Param>::queues;
+  using urEnqueueKernelLaunchIncrementMultiDeviceTestWithParam<Param>::kernels;
+  using urEnqueueKernelLaunchIncrementMultiDeviceTestWithParam<
+      Param>::SharedMem;
+
+  void SetUp() override {
+    useEvents = std::get<0>(getParam()).value;
+    queuePerThread = std::get<1>(getParam()).value;
+    // With !queuePerThread this becomes a test on a single device
+    this->trueMultiDevice = queuePerThread;
+    urEnqueueKernelLaunchIncrementMultiDeviceTestWithParam<Param>::SetUp();
+  }
+
+  bool useEvents;
+  bool queuePerThread;
+};
 
 UUR_PLATFORM_TEST_SUITE_WITH_PARAM(
     urEnqueueKernelLaunchIncrementMultiDeviceMultiThreadTest,
@@ -356,11 +376,7 @@ UUR_PLATFORM_TEST_SUITE_WITH_PARAM(
     printParams<urEnqueueKernelLaunchIncrementMultiDeviceMultiThreadTest>);
 
 // Enqueue kernelLaunch concurrently from multiple threads
-// With !queuePerThread this becomes a test on a single device
 TEST_P(urEnqueueKernelLaunchIncrementMultiDeviceMultiThreadTest, Success) {
-  auto useEvents = std::get<0>(getParam()).value;
-  auto queuePerThread = std::get<1>(getParam()).value;
-
   if (!queuePerThread) {
     UUR_KNOWN_FAILURE_ON(uur::LevelZero{}, uur::LevelZeroV2{});
   }
@@ -371,11 +387,11 @@ TEST_P(urEnqueueKernelLaunchIncrementMultiDeviceMultiThreadTest, Success) {
   static constexpr size_t numOpsPerThread = 6;
 
   for (size_t i = 0; i < numThreads; i++) {
-    threads.emplace_back([this, i, queuePerThread, useEvents]() {
+    threads.emplace_back([this, i]() {
       constexpr size_t global_offset = 0;
       constexpr size_t n_dimensions = 1;
 
-      auto queue = queuePerThread ? queues[i] : queues.back();
+      auto queue = this->queuePerThread ? queues[i] : queues.back();
       auto kernel = kernels[i];
       auto sharedPtr = SharedMem[i];
 
@@ -385,7 +401,7 @@ TEST_P(urEnqueueKernelLaunchIncrementMultiDeviceMultiThreadTest, Success) {
         ur_event_handle_t *lastEvent = nullptr;
         ur_event_handle_t *signalEvent = nullptr;
 
-        if (useEvents) {
+        if (this->useEvents) {
           waitNum = j > 0 ? 1 : 0;
           lastEvent = j > 0 ? Events[j - 1].ptr() : nullptr;
           signalEvent = Events[j].ptr();