Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Aaron/cuda runner experiments #2718

Closed
Closed
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .github/workflows/bandit.yml
Original file line number Diff line number Diff line change
@@ -12,6 +12,7 @@ permissions:

jobs:
bandit:
if: false
name: Bandit
strategy:
matrix:
4 changes: 1 addition & 3 deletions .github/workflows/benchmarks-reusable.yml
Original file line number Diff line number Diff line change
@@ -219,14 +219,12 @@ jobs:
--adapter ${{ matrix.adapter.str_name }}
--compute-runtime ${{ inputs.compute_runtime_commit }}
--build-igc
--compare baseline
${{ inputs.upload_report && '--output-html' || '' }}
${{ inputs.pr_no != 0 && '--output-markdown' || '' }}
${{ inputs.bench_script_params }}

- name: Print benchmark results
run: |
cat ${{ github.workspace }}/ur-repo/benchmark_results.md || true
cat ${{ github.workspace }}/ur-repo/benchmark_results.md

- name: Add comment to PR
uses: actions/github-script@60a0d83039c74a4aee543508d2ffcb1c3799cdea # v7.0.1
2 changes: 1 addition & 1 deletion .github/workflows/benchmarks.yml
Original file line number Diff line number Diff line change
@@ -24,7 +24,7 @@ on:
type: number
required: true
bench_script_params:
description: Parameters passed to the script executing benchmark
description: Parameters passed to script executing benchmark
type: string
required: false
default: ''
114 changes: 114 additions & 0 deletions .github/workflows/build-hw-reusable.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,114 @@
---
name: Build - Adapters on HW - Reusable

on:
workflow_call:
inputs:
adapter_name:
required: true
type: string
other_adapter_name:
required: false
type: string
default: ""
runner_name:
required: true
type: string
platform:
description: "Platform string, `UR_CTS_ADAPTER_PLATFORM` will be set to this."
required: false
type: string
default: ""
static_loader:
required: false
type: string
default: OFF
static_adapter:
required: false
type: string
default: OFF

permissions:
contents: read

env:
UR_LOG_CUDA: "level:error;flush:error"
UR_LOG_HIP: "level:error;flush:error"
UR_LOG_LEVEL_ZERO: "level:error;flush:error"
UR_LOG_NATIVE_CPU: "level:error;flush:error"
UR_LOG_OPENCL: "level:error;flush:error"

jobs:
adapter-build-hw:
name: Build & CTS
if: github.repository == 'oneapi-src/unified-runtime' # run only on upstream; forks won't have the HW
strategy:
matrix:
adapter: [
{
name: "${{inputs.adapter_name}}",
other_name: "${{inputs.other_adapter_name}}",
platform: "${{inputs.platform}}",
static_Loader: "${{inputs.static_loader}}",
static_adapter: "${{inputs.static_loader}}"
}
]
build_type: [Release]
compiler: [{c: gcc, cxx: g++}]

runs-on: CUDA_E2E

steps:
- uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1

- name: Install pip packages
run: pip install -r third_party/requirements.txt

- name: Download DPC++
run: |
wget -O ${{github.workspace}}/dpcpp_compiler.tar.gz https://github.com/intel/llvm/releases/download/nightly-2024-12-12/sycl_linux.tar.gz
mkdir dpcpp_compiler
tar -xvf ${{github.workspace}}/dpcpp_compiler.tar.gz -C dpcpp_compiler

- name: Configure CMake
run: >
cmake
-B${{github.workspace}}/build
-DCMAKE_C_COMPILER=${{matrix.compiler.c}}
-DCMAKE_CXX_COMPILER=${{matrix.compiler.cxx}}
-DCMAKE_BUILD_TYPE=${{matrix.build_type}}
-DUR_ENABLE_TRACING=ON
-DUR_DEVELOPER_MODE=ON
-DUR_BUILD_TESTS=ON
-DUR_BUILD_ADAPTER_${{matrix.adapter.name}}=ON
-DUR_CONFORMANCE_TEST_LOADER=${{ matrix.adapter.other_name != '' && 'ON' || 'OFF' }}
${{ matrix.adapter.other_name != '' && format('-DUR_BUILD_ADAPTER_{0}=ON', matrix.adapter.other_name) || '' }}
-DUR_STATIC_LOADER=${{matrix.adapter.static_Loader}}
-DUR_STATIC_ADAPTER_${{matrix.adapter.name}}=${{matrix.adapter.static_adapter}}
-DUR_DPCXX=${{github.workspace}}/dpcpp_compiler/bin/clang++
-DUR_SYCL_LIBRARY_DIR=${{github.workspace}}/dpcpp_compiler/lib
-DCMAKE_INSTALL_PREFIX=${{github.workspace}}/install
${{ matrix.adapter.name == 'HIP' && '-DUR_CONFORMANCE_AMD_ARCH=gfx1030' || '' }}
${{ matrix.adapter.name == 'HIP' && '-DUR_HIP_PLATFORM=AMD' || '' }}

- name: Build
# This is so that device binaries can find the sycl runtime library
run: cmake --build ${{github.workspace}}/build -j $(nproc)

- name: Install
# This is to check that install command does not fail
run: cmake --install ${{github.workspace}}/build

- name: Test adapter specific
working-directory: ${{github.workspace}}/build
run: ctest -C ${{matrix.build_type}} --output-on-failure -L "adapter-specific" -E "memcheck" --timeout 180
# Don't run adapter specific tests when building multiple adapters
if: ${{ matrix.adapter.other_name == '' }}

- name: Test adapters
working-directory: ${{github.workspace}}/build
run: env UR_CTS_ADAPTER_PLATFORM="${{matrix.adapter.platform}}" ctest -C ${{matrix.build_type}} --output-on-failure -L "conformance" --timeout 180

- name: Get information about platform
if: ${{ always() }}
run: .github/scripts/get_system_info.sh
1 change: 1 addition & 0 deletions .github/workflows/codeql.yml
Original file line number Diff line number Diff line change
@@ -11,6 +11,7 @@ permissions:

jobs:
analyze-ubuntu:
if: false
name: Analyze on Ubuntu
runs-on: ${{ github.repository_owner == 'oneapi-src' && 'intel-ubuntu-22.04' || 'ubuntu-latest' }}
permissions:
36 changes: 36 additions & 0 deletions .github/workflows/e2e.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
name: SYCL E2E

on: [push, pull_request]

concurrency:
group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }}
cancel-in-progress: true

permissions:
contents: read
pull-requests: write

jobs:

e2e-level-zero:
name: Level Zero
permissions:
contents: read
pull-requests: write
uses: ./.github/workflows/e2e_level_zero.yml

e2e-opencl:
name: OpenCL
permissions:
contents: read
pull-requests: write
uses: ./.github/workflows/e2e_opencl.yml

# Causes hangs: https://github.com/oneapi-src/unified-runtime/issues/2398
#e2e-cuda:
# name: CUDA
# permissions:
# contents: read
# pull-requests: write
# needs: [ubuntu-build, cuda]
# uses: ./.github/workflows/e2e_cuda.yml
220 changes: 220 additions & 0 deletions .github/workflows/e2e_core.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,220 @@
name: E2E build & run

on:
# this workflow can by only triggered by other workflows
# for example by: e2e_cuda.yml or e2e_opencl.yml
workflow_call:
# acceptable input from adapter-specific workflows
inputs:
name:
description: Adapter name
type: string
required: true
str_name:
description: Formatted adapter name
type: string
required: true
prefix:
description: Prefix for cmake parameter
type: string
required: true
config:
description: Params for sycl configuration
type: string
required: true
unit:
description: Test unit (cpu/gpu)
type: string
required: true
runner_tag:
description: Tag defifned for the runner
type: string
required: true
xfail:
description: Allow test failures
type: string
required: false
xfail_not:
description: Not xfail
type: string
required: false
filter_out:
description: Tests to filter out completely
type: string
required: false
extra_lit_flags:
description: Additional llvm-lit flags to use
type: string
required: false

permissions:
contents: read
pull-requests: write

jobs:
changed-files:
if: false
name: Check for changed files
runs-on: ${{ github.repository_owner == 'oneapi-src' && 'intel-ubuntu-22.04' || 'ubuntu-latest' }}
outputs:
any_changed: ${{ steps.get-changed.outputs.any_changed }}
steps:
- uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1
- name: Get changed files
id: get-changed
uses: tj-actions/changed-files@d6babd6899969df1a11d14c368283ea4436bca78 # v44.5.2
with:
files: |
source/adapters/${{inputs.str_name}}/**
source/loader/**
.github/workflows/e2e*
e2e-build-hw:
# We want to run the job only if there are changes in the specific adapter
if: needs.changed-files.outputs.any_changed == 'true'
name: Build SYCL, UR, run E2E
needs: changed-files
permissions:
contents: read
pull-requests: write

# Allow failures, since SYCL tests and API may be not stable
continue-on-error: true
strategy:
matrix:
adapter: [
{name: "${{inputs.name}}",
str_name: "${{inputs.str_name}}",
prefix: "${{inputs.prefix}}",
config: "${{inputs.config}}",
unit: "${{inputs.unit}}",
extra_lit_flags: "${{inputs.extra_lit_flags}}"},
]
build_type: [Release]
compiler: [{c: clang, cxx: clang++}]

runs-on: ${{inputs.runner_tag}}

steps:
# Workspace on self-hosted runners is not cleaned automatically.
# We have to delete the files created outside of using actions.
- name: Cleanup self-hosted workspace
if: always()
run: |
ls -la ./
rm -rf ./* || true
- name: Checkout UR
uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1
with:
path: ur-repo

- name: Checkout SYCL
uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1
with:
repository: intel/llvm
ref: refs/heads/sycl
path: sycl-repo

- name: Set CUDA env vars
if: matrix.adapter.name == 'CUDA'
run: |
echo "CUDA_LIB_PATH=/usr/local/cuda/lib64/stubs" >> $GITHUB_ENV
echo "LD_LIBRARY_PATH=/usr/local/cuda/compat/:/usr/local/cuda/lib64:$LD_LIBRARY_PATH" >> $GITHUB_ENV
- name: Configure SYCL
run: >
python3 sycl-repo/buildbot/configure.py
-t ${{matrix.build_type}}
-o ${{github.workspace}}/sycl_build
--cmake-gen "Ninja"
--ci-defaults ${{matrix.adapter.config}}
--cmake-opt="-DLLVM_INSTALL_UTILS=ON"
--cmake-opt="-DSYCL_PI_TESTS=OFF"
--cmake-opt="-DSYCL_UR_USE_FETCH_CONTENT=OFF"
--cmake-opt="-DSYCL_UR_SOURCE_DIR=${{github.workspace}}/ur-repo/"
--cmake-opt=-DCMAKE_C_COMPILER_LAUNCHER=ccache
--cmake-opt=-DCMAKE_CXX_COMPILER_LAUNCHER=ccache
- name: Build SYCL
run: cmake --build ${{github.workspace}}/sycl_build -j

- name: Set extra llvm-lit options
if: matrix.adapter.extra_lit_flags != ''
run: echo "LIT_OPTS=${{matrix.adapter.extra_lit_flags}}" >> $GITHUB_ENV

- name: Run check-sycl
# Remove after fixing SYCL test :: abi/layout_handler.cpp
# This issue does not affect further execution of e2e with UR.
continue-on-error: true
run: cmake --build ${{github.workspace}}/sycl_build --target check-sycl

- name: Set additional env. vars
run: |
echo "${{github.workspace}}/sycl_build/bin" >> $GITHUB_PATH
echo "LD_LIBRARY_PATH=${{github.workspace}}/sycl_build/lib:$LD_LIBRARY_PATH" >> $GITHUB_ENV
# Running (newly built) sycl-ls sets up some extra variables
- name: Setup SYCL variables
run: |
which clang++ sycl-ls
SYCL_UR_TRACE=-1 sycl-ls
- name: Build e2e tests
run: >
cmake
-GNinja
-B ${{github.workspace}}/build-e2e/
-S ${{github.workspace}}/sycl-repo/sycl/test-e2e/
-DSYCL_TEST_E2E_TARGETS="${{matrix.adapter.prefix}}${{matrix.adapter.str_name}}:${{matrix.adapter.unit}}"
-DCMAKE_CXX_COMPILER="$(which clang++)"
-DLLVM_LIT="${{github.workspace}}/sycl-repo/llvm/utils/lit/lit.py"
- name: Set LIT_XFAIL
if: inputs.xfail != ''
run: echo "LIT_XFAIL=${{inputs.xfail}}" >> $GITHUB_ENV

- name: Set LIT_FILTER_OUT
if: inputs.filter_out != ''
run: echo "LIT_FILTER_OUT=${{inputs.filter_out}}" >> $GITHUB_ENV

- name: Set LIT_XFAIL_NOT
if: inputs.xfail_not != ''
run: echo "LIT_XFAIL_NOT=${{inputs.xfail_not}}" >> $GITHUB_ENV

# TODO: remove once intel/llvm lit tests can properly recognize the GPU
- name: Configure hardware platform feature for L0
if: matrix.adapter.name == 'L0'
run: |
sed -i '/import lit.llvm/i config.available_features.add("gpu-intel-pvc-1T")' build-e2e/lit.site.cfg.py
sed -i '/import lit.llvm/i config.available_features.add("gpu-intel-pvc")' build-e2e/lit.site.cfg.py
- name: Run e2e tests
id: tests
run: ninja -C build-e2e check-sycl-e2e || echo "e2e tests have failed. Ignoring failure."

- name: Get information about platform
if: ${{ always() }}
working-directory: ${{github.workspace}}/ur-repo
run: .github/scripts/get_system_info.sh

# FIXME: Requires pull-request: write permissions but this is only granted
# on pull requests from forks if using pull_request_target workflow
# trigger but not the pull_request trigger..
# - name: Add comment to PR
# uses: actions/github-script@60a0d83039c74a4aee543508d2ffcb1c3799cdea # v7.0.1
# if: ${{ always() }}
# with:
# script: |
# const adapter = '${{ matrix.adapter.name }}';
# const url = '${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}';
# const test_status = '${{ steps.tests.outcome }}';
# const job_status = '${{ job.status }}';
# const body = `E2E ${adapter} build:\n${url}\nJob status: ${job_status}. Test status: ${test_status}`;

# github.rest.issues.createComment({
# issue_number: context.issue.number,
# owner: context.repo.owner,
# repo: context.repo.repo,
# body: body
# })
24 changes: 24 additions & 0 deletions .github/workflows/e2e_cuda.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
name: E2E Cuda

on:
workflow_call:

permissions:
contents: read
pull-requests: write

jobs:
e2e-build-hw:
if: github.repository == 'oneapi-src/unified-runtime' # run only on upstream; forks will not have the HW
name: Start e2e job
# use core flow, run it with cuda specific parameters
uses: ./.github/workflows/e2e_core.yml
with:
name: "CUDA"
runner_tag: "CUDA_E2E"
str_name: "cuda"
prefix: "ext_oneapi_"
config: "--cuda"
unit: "gpu"
extra_lit_flags: "-sv --max-time=3600"
xfail: "Regression/device_num.cpp"
31 changes: 31 additions & 0 deletions .github/workflows/e2e_level_zero.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
name: E2E Level Zero

on:
workflow_call:

permissions:
contents: read
pull-requests: write

jobs:
e2e-build-hw:
if: github.repository == 'oneapi-src/unified-runtime' # run only on upstream; forks will not have the HW
name: Start e2e job
# use core flow, run it with L0 specific parameters
uses: ./.github/workflows/e2e_core.yml
with:
name: "L0"
runner_tag: "L0_E2E"
str_name: "level_zero"
prefix: "ext_oneapi_"
config: ""
unit: "gpu"
# Failing tests
xfail: "InvokeSimd/Regression/call_vadd_1d_spill.cpp;InvokeSimd/Regression/ImplicitSubgroup/call_vadd_1d_spill.cpp;ESIMD/mask_expand_load.cpp;Matrix/joint_matrix_prefetch.cpp;ESIMD/mask_expand_load.cpp;Matrix/SPVCooperativeMatrix/joint_matrix_prefetch.cpp;Matrix/joint_matrix_bf16_fill_k_cache_prefetch.cpp;Matrix/SPVCooperativeMatrix/element_wise_ops.cpp;"
# Unexpectedly Passed Tests
xfail_not: ""
# Flaky tests
filter_out: "Basic/accessor/accessor.cpp|DeviceArchitecture/device_architecture_comparison_on_device_aot.cpp|Graph/Explicit/interop-level-zero-launch-kernel.cpp|Graph/RecordReplay/interop-level-zero-launch-kernel.cpp|syclcompat/launch/launch_policy_lmem.cpp"
# These runners by default spawn upwards of 260 workers.
# We also add a time out just in case some test hangs
extra_lit_flags: "--param gpu-intel-pvc=True --param gpu-intel-pvc-1T=True -sv -j 100 --max-time=3600"
24 changes: 24 additions & 0 deletions .github/workflows/e2e_opencl.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
name: E2E OpenCL

on:
workflow_call:

permissions:
contents: read
pull-requests: write

jobs:
e2e-build-hw:
if: github.repository == 'oneapi-src/unified-runtime' # run only on upstream; forks will not have the HW
name: Start e2e job
# use core flow, run it with OpenCL specific parameters
uses: ./.github/workflows/e2e_core.yml
with:
name: "OPENCL"
runner_tag: "OPENCL"
str_name: "opencl"
prefix: ""
config: ""
unit: "cpu"
xfail: "AOT/double.cpp;AOT/half.cpp;AOT/reqd-sg-size.cpp;Basic/built-ins/marray_geometric.cpp;KernelCompiler/kernel_compiler_spirv.cpp;KernelCompiler/opencl_queries.cpp;NonUniformGroups/ballot_group.cpp;NonUniformGroups/ballot_group_algorithms.cpp;NonUniformGroups/fixed_size_group_algorithms.cpp;NonUniformGroups/opportunistic_group.cpp;NonUniformGroups/opportunistic_group_algorithms.cpp;NonUniformGroups/tangle_group.cpp;NonUniformGroups/tangle_group_algorithms.cpp"
extra_lit_flags: "-sv --max-time=3600"
2 changes: 1 addition & 1 deletion .github/workflows/multi_device.yml
Original file line number Diff line number Diff line change
@@ -13,7 +13,7 @@ permissions:
jobs:
examples:
name: Multi Device testing
if: github.repository == 'oneapi-src/unified-runtime' # run only on upstream; forks won't have the HW
if: false
strategy:
matrix:
adapter: [
66 changes: 66 additions & 0 deletions .github/workflows/source-checks.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
on:
workflow_call:

permissions:
contents: read

jobs:
source-checks:
if: false
strategy:
matrix:
os: ['ubuntu-22.04', 'windows-2022']

runs-on: ${{matrix.os}}

steps:
- uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1

- uses: actions/setup-python@0a5c61591373683505ea898e09a3ea4f39ef2b9c # v5.0.0
with:
python-version: 3.9

- name: Install pip packages
run: pip install -r third_party/requirements.txt

- name: "[Lin] Install doxygen"
if: matrix.os == 'ubuntu-22.04'
run: |
sudo apt-get update
sudo apt-get install -y doxygen
- name: "[Win] Install doxygen"
if: matrix.os == 'windows-2022'
run: |
$WorkingDir = $PWD.Path
Invoke-WebRequest -Uri https://github.com/doxygen/doxygen/releases/download/Release_1_9_8/doxygen-1.9.8.windows.x64.bin.zip -OutFile "$WorkingDir\doxygen.zip"
Expand-Archive -Path "$WorkingDir\doxygen.zip"
Add-Content $env:GITHUB_PATH "$WorkingDir\doxygen"
- name: "[Lin] Install hwloc"
if: matrix.os == 'ubuntu-22.04'
run: .github/scripts/install_hwloc.sh

- name: "[Win] Install hwloc"
if: matrix.os == 'windows-2022'
run: vcpkg install hwloc:x64-windows

- name: Configure CMake
env:
VCPKG_PATH: "C:/vcpkg/packages/hwloc_x64-windows"
run: >
cmake
-B${{github.workspace}}/build
-DCMAKE_PREFIX_PATH="${{env.VCPKG_PATH}}"
-DUR_ENABLE_TRACING=OFF
-DCMAKE_BUILD_TYPE=Debug
-DUR_BUILD_TESTS=OFF
-DUR_FORMAT_CPP_STYLE=ON
# Verifying license should be enough on a single OS
- name: Verify that each source file contains a license
if: matrix.os == 'ubuntu-22.04'
run: cmake --build ${{github.workspace}}/build --target verify-licenses

- name: Generate source from spec, check for uncommitted diff
run: cmake --build ${{github.workspace}}/build --target check-generated
1 change: 1 addition & 0 deletions .github/workflows/trivy.yml
Original file line number Diff line number Diff line change
@@ -18,6 +18,7 @@ permissions:

jobs:
linux:
if: false
name: Trivy
runs-on: ${{ github.repository_owner == 'oneapi-src' && 'intel-ubuntu-22.04' || 'ubuntu-latest' }}
permissions:
18 changes: 18 additions & 0 deletions .github/workflows/unified-runtime.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
name: Unified Runtime

on: [push, pull_request]

concurrency:
group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }}
cancel-in-progress: true

permissions:
contents: read

jobs:
cuda:
name: CUDA
uses: ./.github/workflows/build-hw-reusable.yml
with:
adapter_name: CUDA
runner_name: CUDA
31 changes: 0 additions & 31 deletions test/adapters/cuda/context_tests.cpp
Original file line number Diff line number Diff line change
@@ -43,37 +43,6 @@ TEST_P(cudaUrContextCreateTest, CreateWithChildThread) {
callContextFromOtherThread.join();
}

TEST_P(cudaUrContextCreateTest, ActiveContext) {
uur::raii::Context context = nullptr;
ASSERT_SUCCESS(urContextCreate(1, &device, nullptr, context.ptr()));
ASSERT_NE(context, nullptr);

uur::raii::Queue queue = nullptr;
ur_queue_properties_t queue_props{UR_STRUCTURE_TYPE_QUEUE_PROPERTIES, nullptr,
0};
ASSERT_SUCCESS(urQueueCreate(context, device, &queue_props, queue.ptr()));
ASSERT_NE(queue, nullptr);

// check that the queue has the correct context
ASSERT_EQ(context, queue->getContext());

// create a buffer
uur::raii::Mem buffer = nullptr;
ASSERT_SUCCESS(urMemBufferCreate(context, UR_MEM_FLAG_READ_WRITE, 1024,
nullptr, buffer.ptr()));
ASSERT_NE(buffer, nullptr);

// check that the context is now the active CUDA context
CUcontext cudaCtx = nullptr;
ASSERT_SUCCESS_CUDA(cuCtxGetCurrent(&cudaCtx));
ASSERT_NE(cudaCtx, nullptr);

ur_native_handle_t native_context = 0;
ASSERT_SUCCESS(urContextGetNativeHandle(context, &native_context));
ASSERT_NE(reinterpret_cast<CUcontext>(native_context), nullptr);
ASSERT_EQ(cudaCtx, reinterpret_cast<CUcontext>(native_context));
}

TEST_P(cudaUrContextCreateTest, ContextLifetimeExisting) {
// start by setting up a CUDA context on the thread
CUcontext original;
5 changes: 3 additions & 2 deletions test/adapters/cuda/memory_tests.cpp
Original file line number Diff line number Diff line change
@@ -14,11 +14,12 @@ TEST_P(cudaMemoryTest, urMemBufferNoActiveContext) {
constexpr size_t memSize = 1024u;

CUcontext current = nullptr;
do {
ASSERT_SUCCESS_CUDA(cuCtxGetCurrent(&current));
while (current != nullptr) {
CUcontext oldContext = nullptr;
ASSERT_SUCCESS_CUDA(cuCtxPopCurrent(&oldContext));
ASSERT_SUCCESS_CUDA(cuCtxGetCurrent(&current));
} while (current != nullptr);
}

uur::raii::Mem mem;
ASSERT_SUCCESS(urMemBufferCreate(context, UR_MEM_FLAG_READ_WRITE, memSize,
11 changes: 10 additions & 1 deletion test/conformance/enqueue/helpers.h
Original file line number Diff line number Diff line change
@@ -203,7 +203,15 @@ struct urMultiQueueMultiDeviceTestWithParam
urContextCreate(devices.size(), devices.data(), nullptr, &context));

// Duplicate our devices until we hit the minimum size specified.
auto srcDevices = devices;
std::vector<ur_device_handle_t> srcDevices;
// If the test actually only wants one device duplicated a bunch of times
// we take devices[0] and discard any other devices that were discovered.
if (trueMultiDevice) {
srcDevices = devices;
} else {
srcDevices.push_back(devices[0]);
devices.clear();
}
while (devices.size() < minDevices) {
devices.insert(devices.end(), srcDevices.begin(), srcDevices.end());
}
@@ -224,6 +232,7 @@ struct urMultiQueueMultiDeviceTestWithParam

ur_context_handle_t context;
std::vector<ur_queue_handle_t> queues;
bool trueMultiDevice = true;
};

} // namespace uur
2 changes: 1 addition & 1 deletion test/conformance/enqueue/urEnqueueKernelLaunch.cpp
Original file line number Diff line number Diff line change
@@ -565,7 +565,7 @@ UUR_INSTANTIATE_PLATFORM_TEST_SUITE(urEnqueueKernelLaunchMultiDeviceTest);
// TODO: rewrite this test, right now it only works for a single queue
// (the context is only created for one device)
TEST_P(urEnqueueKernelLaunchMultiDeviceTest, KernelLaunchReadDifferentQueues) {
UUR_KNOWN_FAILURE_ON(uur::LevelZero{}, uur::LevelZeroV2{});
UUR_KNOWN_FAILURE_ON(uur::CUDA{}, uur::LevelZero{}, uur::LevelZeroV2{});

uur::KernelLaunchHelper helper =
uur::KernelLaunchHelper{platform, context, kernel, queues[0]};
Original file line number Diff line number Diff line change
@@ -155,13 +155,14 @@ struct urEnqueueKernelLaunchIncrementTest

using Param = uur::BoolTestParam;

using urMultiQueueLaunchMemcpyTest<numOps, Param>::context;
using urMultiQueueLaunchMemcpyTest<numOps, Param>::queues;
using urMultiQueueLaunchMemcpyTest<numOps, Param>::devices;
using urMultiQueueLaunchMemcpyTest<numOps, Param>::kernels;
using urMultiQueueLaunchMemcpyTest<numOps, Param>::SharedMem;

void SetUp() override {
// We actually need a single device used multiple times for this test, as
// opposed to utilizing all available devices for the platform.
this->trueMultiDevice = false;
UUR_RETURN_ON_FATAL_FAILURE(
urMultiQueueLaunchMemcpyTest<numOps, Param>::
SetUp()); // Use single device, duplicated numOps times
@@ -344,9 +345,28 @@ TEST_P(urEnqueueKernelLaunchIncrementMultiDeviceTest, Success) {
}
}

using urEnqueueKernelLaunchIncrementMultiDeviceMultiThreadTest =
urEnqueueKernelLaunchIncrementMultiDeviceTestWithParam<
std::tuple<uur::BoolTestParam, uur::BoolTestParam>>;
struct urEnqueueKernelLaunchIncrementMultiDeviceMultiThreadTest
: urEnqueueKernelLaunchIncrementMultiDeviceTestWithParam<
std::tuple<uur::BoolTestParam, uur::BoolTestParam>> {
using Param = std::tuple<uur::BoolTestParam, uur::BoolTestParam>;

using urEnqueueKernelLaunchIncrementMultiDeviceTestWithParam<Param>::devices;
using urEnqueueKernelLaunchIncrementMultiDeviceTestWithParam<Param>::queues;
using urEnqueueKernelLaunchIncrementMultiDeviceTestWithParam<Param>::kernels;
using urEnqueueKernelLaunchIncrementMultiDeviceTestWithParam<
Param>::SharedMem;

void SetUp() override {
useEvents = std::get<0>(getParam()).value;
queuePerThread = std::get<1>(getParam()).value;
// With !queuePerThread this becomes a test on a single device
this->trueMultiDevice = queuePerThread;
urEnqueueKernelLaunchIncrementMultiDeviceTestWithParam<Param>::SetUp();
}

bool useEvents;
bool queuePerThread;
};

UUR_PLATFORM_TEST_SUITE_WITH_PARAM(
urEnqueueKernelLaunchIncrementMultiDeviceMultiThreadTest,
@@ -356,11 +376,7 @@ UUR_PLATFORM_TEST_SUITE_WITH_PARAM(
printParams<urEnqueueKernelLaunchIncrementMultiDeviceMultiThreadTest>);

// Enqueue kernelLaunch concurrently from multiple threads
// With !queuePerThread this becomes a test on a single device
TEST_P(urEnqueueKernelLaunchIncrementMultiDeviceMultiThreadTest, Success) {
auto useEvents = std::get<0>(getParam()).value;
auto queuePerThread = std::get<1>(getParam()).value;

if (!queuePerThread) {
UUR_KNOWN_FAILURE_ON(uur::LevelZero{}, uur::LevelZeroV2{});
}
@@ -371,11 +387,11 @@ TEST_P(urEnqueueKernelLaunchIncrementMultiDeviceMultiThreadTest, Success) {
static constexpr size_t numOpsPerThread = 6;

for (size_t i = 0; i < numThreads; i++) {
threads.emplace_back([this, i, queuePerThread, useEvents]() {
threads.emplace_back([this, i]() {
constexpr size_t global_offset = 0;
constexpr size_t n_dimensions = 1;

auto queue = queuePerThread ? queues[i] : queues.back();
auto queue = this->queuePerThread ? queues[i] : queues.back();
auto kernel = kernels[i];
auto sharedPtr = SharedMem[i];

@@ -385,7 +401,7 @@ TEST_P(urEnqueueKernelLaunchIncrementMultiDeviceMultiThreadTest, Success) {
ur_event_handle_t *lastEvent = nullptr;
ur_event_handle_t *signalEvent = nullptr;

if (useEvents) {
if (this->useEvents) {
waitNum = j > 0 ? 1 : 0;
lastEvent = j > 0 ? Events[j - 1].ptr() : nullptr;
signalEvent = Events[j].ptr();