diff --git a/.github/workflows/bandit.yml b/.github/workflows/bandit.yml index 124046d258..c0eb8c1d55 100644 --- a/.github/workflows/bandit.yml +++ b/.github/workflows/bandit.yml @@ -12,6 +12,7 @@ permissions: jobs: bandit: + if: false name: Bandit strategy: matrix: diff --git a/.github/workflows/benchmarks-reusable.yml b/.github/workflows/benchmarks-reusable.yml index 07a76a9846..bfd6064ba1 100644 --- a/.github/workflows/benchmarks-reusable.yml +++ b/.github/workflows/benchmarks-reusable.yml @@ -219,14 +219,12 @@ jobs: --adapter ${{ matrix.adapter.str_name }} --compute-runtime ${{ inputs.compute_runtime_commit }} --build-igc - --compare baseline ${{ inputs.upload_report && '--output-html' || '' }} - ${{ inputs.pr_no != 0 && '--output-markdown' || '' }} ${{ inputs.bench_script_params }} - name: Print benchmark results run: | - cat ${{ github.workspace }}/ur-repo/benchmark_results.md || true + cat ${{ github.workspace }}/ur-repo/benchmark_results.md - name: Add comment to PR uses: actions/github-script@60a0d83039c74a4aee543508d2ffcb1c3799cdea # v7.0.1 diff --git a/.github/workflows/benchmarks.yml b/.github/workflows/benchmarks.yml index d260227214..7de3926daf 100644 --- a/.github/workflows/benchmarks.yml +++ b/.github/workflows/benchmarks.yml @@ -24,7 +24,7 @@ on: type: number required: true bench_script_params: - description: Parameters passed to the script executing benchmark + description: Parameters passed to script executing benchmark type: string required: false default: '' diff --git a/.github/workflows/build-hw-reusable.yml b/.github/workflows/build-hw-reusable.yml new file mode 100644 index 0000000000..95242e7e93 --- /dev/null +++ b/.github/workflows/build-hw-reusable.yml @@ -0,0 +1,114 @@ +--- +name: Build - Adapters on HW - Reusable + +on: + workflow_call: + inputs: + adapter_name: + required: true + type: string + other_adapter_name: + required: false + type: string + default: "" + runner_name: + required: true + type: string + platform: + description: "Platform string, `UR_CTS_ADAPTER_PLATFORM` will be set to this." + required: false + type: string + default: "" + static_loader: + required: false + type: string + default: OFF + static_adapter: + required: false + type: string + default: OFF + +permissions: + contents: read + +env: + UR_LOG_CUDA: "level:error;flush:error" + UR_LOG_HIP: "level:error;flush:error" + UR_LOG_LEVEL_ZERO: "level:error;flush:error" + UR_LOG_NATIVE_CPU: "level:error;flush:error" + UR_LOG_OPENCL: "level:error;flush:error" + +jobs: + adapter-build-hw: + name: Build & CTS + if: github.repository == 'oneapi-src/unified-runtime' # run only on upstream; forks won't have the HW + strategy: + matrix: + adapter: [ + { + name: "${{inputs.adapter_name}}", + other_name: "${{inputs.other_adapter_name}}", + platform: "${{inputs.platform}}", + static_Loader: "${{inputs.static_loader}}", + static_adapter: "${{inputs.static_loader}}" + } + ] + build_type: [Release] + compiler: [{c: gcc, cxx: g++}] + + runs-on: CUDA_E2E + + steps: + - uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1 + + - name: Install pip packages + run: pip install -r third_party/requirements.txt + + - name: Download DPC++ + run: | + wget -O ${{github.workspace}}/dpcpp_compiler.tar.gz https://github.com/intel/llvm/releases/download/nightly-2024-12-12/sycl_linux.tar.gz + mkdir dpcpp_compiler + tar -xvf ${{github.workspace}}/dpcpp_compiler.tar.gz -C dpcpp_compiler + + - name: Configure CMake + run: > + cmake + -B${{github.workspace}}/build + -DCMAKE_C_COMPILER=${{matrix.compiler.c}} + -DCMAKE_CXX_COMPILER=${{matrix.compiler.cxx}} + -DCMAKE_BUILD_TYPE=${{matrix.build_type}} + -DUR_ENABLE_TRACING=ON + -DUR_DEVELOPER_MODE=ON + -DUR_BUILD_TESTS=ON + -DUR_BUILD_ADAPTER_${{matrix.adapter.name}}=ON + -DUR_CONFORMANCE_TEST_LOADER=${{ matrix.adapter.other_name != '' && 'ON' || 'OFF' }} + ${{ matrix.adapter.other_name != '' && format('-DUR_BUILD_ADAPTER_{0}=ON', matrix.adapter.other_name) || '' }} + -DUR_STATIC_LOADER=${{matrix.adapter.static_Loader}} + -DUR_STATIC_ADAPTER_${{matrix.adapter.name}}=${{matrix.adapter.static_adapter}} + -DUR_DPCXX=${{github.workspace}}/dpcpp_compiler/bin/clang++ + -DUR_SYCL_LIBRARY_DIR=${{github.workspace}}/dpcpp_compiler/lib + -DCMAKE_INSTALL_PREFIX=${{github.workspace}}/install + ${{ matrix.adapter.name == 'HIP' && '-DUR_CONFORMANCE_AMD_ARCH=gfx1030' || '' }} + ${{ matrix.adapter.name == 'HIP' && '-DUR_HIP_PLATFORM=AMD' || '' }} + + - name: Build + # This is so that device binaries can find the sycl runtime library + run: cmake --build ${{github.workspace}}/build -j $(nproc) + + - name: Install + # This is to check that install command does not fail + run: cmake --install ${{github.workspace}}/build + + - name: Test adapter specific + working-directory: ${{github.workspace}}/build + run: ctest -C ${{matrix.build_type}} --output-on-failure -L "adapter-specific" -E "memcheck" --timeout 180 + # Don't run adapter specific tests when building multiple adapters + if: ${{ matrix.adapter.other_name == '' }} + + - name: Test adapters + working-directory: ${{github.workspace}}/build + run: env UR_CTS_ADAPTER_PLATFORM="${{matrix.adapter.platform}}" ctest -C ${{matrix.build_type}} --output-on-failure -L "conformance" --timeout 180 + + - name: Get information about platform + if: ${{ always() }} + run: .github/scripts/get_system_info.sh diff --git a/.github/workflows/codeql.yml b/.github/workflows/codeql.yml index fdc5d0c0c0..77af22dfd4 100644 --- a/.github/workflows/codeql.yml +++ b/.github/workflows/codeql.yml @@ -11,6 +11,7 @@ permissions: jobs: analyze-ubuntu: + if: false name: Analyze on Ubuntu runs-on: ${{ github.repository_owner == 'oneapi-src' && 'intel-ubuntu-22.04' || 'ubuntu-latest' }} permissions: diff --git a/.github/workflows/e2e.yml b/.github/workflows/e2e.yml new file mode 100644 index 0000000000..d0b7040d83 --- /dev/null +++ b/.github/workflows/e2e.yml @@ -0,0 +1,36 @@ +name: SYCL E2E + +on: [push, pull_request] + +concurrency: + group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }} + cancel-in-progress: true + +permissions: + contents: read + pull-requests: write + +jobs: + + e2e-level-zero: + name: Level Zero + permissions: + contents: read + pull-requests: write + uses: ./.github/workflows/e2e_level_zero.yml + + e2e-opencl: + name: OpenCL + permissions: + contents: read + pull-requests: write + uses: ./.github/workflows/e2e_opencl.yml + + # Causes hangs: https://github.com/oneapi-src/unified-runtime/issues/2398 + #e2e-cuda: + # name: CUDA + # permissions: + # contents: read + # pull-requests: write + # needs: [ubuntu-build, cuda] + # uses: ./.github/workflows/e2e_cuda.yml diff --git a/.github/workflows/e2e_core.yml b/.github/workflows/e2e_core.yml new file mode 100644 index 0000000000..ff5e7bc17c --- /dev/null +++ b/.github/workflows/e2e_core.yml @@ -0,0 +1,220 @@ +name: E2E build & run + +on: + # this workflow can by only triggered by other workflows + # for example by: e2e_cuda.yml or e2e_opencl.yml + workflow_call: + # acceptable input from adapter-specific workflows + inputs: + name: + description: Adapter name + type: string + required: true + str_name: + description: Formatted adapter name + type: string + required: true + prefix: + description: Prefix for cmake parameter + type: string + required: true + config: + description: Params for sycl configuration + type: string + required: true + unit: + description: Test unit (cpu/gpu) + type: string + required: true + runner_tag: + description: Tag defifned for the runner + type: string + required: true + xfail: + description: Allow test failures + type: string + required: false + xfail_not: + description: Not xfail + type: string + required: false + filter_out: + description: Tests to filter out completely + type: string + required: false + extra_lit_flags: + description: Additional llvm-lit flags to use + type: string + required: false + +permissions: + contents: read + pull-requests: write + +jobs: + changed-files: + if: false + name: Check for changed files + runs-on: ${{ github.repository_owner == 'oneapi-src' && 'intel-ubuntu-22.04' || 'ubuntu-latest' }} + outputs: + any_changed: ${{ steps.get-changed.outputs.any_changed }} + steps: + - uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1 + - name: Get changed files + id: get-changed + uses: tj-actions/changed-files@d6babd6899969df1a11d14c368283ea4436bca78 # v44.5.2 + with: + files: | + source/adapters/${{inputs.str_name}}/** + source/loader/** + .github/workflows/e2e* + + e2e-build-hw: + # We want to run the job only if there are changes in the specific adapter + if: needs.changed-files.outputs.any_changed == 'true' + name: Build SYCL, UR, run E2E + needs: changed-files + permissions: + contents: read + pull-requests: write + + # Allow failures, since SYCL tests and API may be not stable + continue-on-error: true + strategy: + matrix: + adapter: [ + {name: "${{inputs.name}}", + str_name: "${{inputs.str_name}}", + prefix: "${{inputs.prefix}}", + config: "${{inputs.config}}", + unit: "${{inputs.unit}}", + extra_lit_flags: "${{inputs.extra_lit_flags}}"}, + ] + build_type: [Release] + compiler: [{c: clang, cxx: clang++}] + + runs-on: ${{inputs.runner_tag}} + + steps: + # Workspace on self-hosted runners is not cleaned automatically. + # We have to delete the files created outside of using actions. + - name: Cleanup self-hosted workspace + if: always() + run: | + ls -la ./ + rm -rf ./* || true + + - name: Checkout UR + uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1 + with: + path: ur-repo + + - name: Checkout SYCL + uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1 + with: + repository: intel/llvm + ref: refs/heads/sycl + path: sycl-repo + + - name: Set CUDA env vars + if: matrix.adapter.name == 'CUDA' + run: | + echo "CUDA_LIB_PATH=/usr/local/cuda/lib64/stubs" >> $GITHUB_ENV + echo "LD_LIBRARY_PATH=/usr/local/cuda/compat/:/usr/local/cuda/lib64:$LD_LIBRARY_PATH" >> $GITHUB_ENV + + - name: Configure SYCL + run: > + python3 sycl-repo/buildbot/configure.py + -t ${{matrix.build_type}} + -o ${{github.workspace}}/sycl_build + --cmake-gen "Ninja" + --ci-defaults ${{matrix.adapter.config}} + --cmake-opt="-DLLVM_INSTALL_UTILS=ON" + --cmake-opt="-DSYCL_PI_TESTS=OFF" + --cmake-opt="-DSYCL_UR_USE_FETCH_CONTENT=OFF" + --cmake-opt="-DSYCL_UR_SOURCE_DIR=${{github.workspace}}/ur-repo/" + --cmake-opt=-DCMAKE_C_COMPILER_LAUNCHER=ccache + --cmake-opt=-DCMAKE_CXX_COMPILER_LAUNCHER=ccache + + - name: Build SYCL + run: cmake --build ${{github.workspace}}/sycl_build -j + + - name: Set extra llvm-lit options + if: matrix.adapter.extra_lit_flags != '' + run: echo "LIT_OPTS=${{matrix.adapter.extra_lit_flags}}" >> $GITHUB_ENV + + - name: Run check-sycl + # Remove after fixing SYCL test :: abi/layout_handler.cpp + # This issue does not affect further execution of e2e with UR. + continue-on-error: true + run: cmake --build ${{github.workspace}}/sycl_build --target check-sycl + + - name: Set additional env. vars + run: | + echo "${{github.workspace}}/sycl_build/bin" >> $GITHUB_PATH + echo "LD_LIBRARY_PATH=${{github.workspace}}/sycl_build/lib:$LD_LIBRARY_PATH" >> $GITHUB_ENV + + # Running (newly built) sycl-ls sets up some extra variables + - name: Setup SYCL variables + run: | + which clang++ sycl-ls + SYCL_UR_TRACE=-1 sycl-ls + + - name: Build e2e tests + run: > + cmake + -GNinja + -B ${{github.workspace}}/build-e2e/ + -S ${{github.workspace}}/sycl-repo/sycl/test-e2e/ + -DSYCL_TEST_E2E_TARGETS="${{matrix.adapter.prefix}}${{matrix.adapter.str_name}}:${{matrix.adapter.unit}}" + -DCMAKE_CXX_COMPILER="$(which clang++)" + -DLLVM_LIT="${{github.workspace}}/sycl-repo/llvm/utils/lit/lit.py" + + - name: Set LIT_XFAIL + if: inputs.xfail != '' + run: echo "LIT_XFAIL=${{inputs.xfail}}" >> $GITHUB_ENV + + - name: Set LIT_FILTER_OUT + if: inputs.filter_out != '' + run: echo "LIT_FILTER_OUT=${{inputs.filter_out}}" >> $GITHUB_ENV + + - name: Set LIT_XFAIL_NOT + if: inputs.xfail_not != '' + run: echo "LIT_XFAIL_NOT=${{inputs.xfail_not}}" >> $GITHUB_ENV + + # TODO: remove once intel/llvm lit tests can properly recognize the GPU + - name: Configure hardware platform feature for L0 + if: matrix.adapter.name == 'L0' + run: | + sed -i '/import lit.llvm/i config.available_features.add("gpu-intel-pvc-1T")' build-e2e/lit.site.cfg.py + sed -i '/import lit.llvm/i config.available_features.add("gpu-intel-pvc")' build-e2e/lit.site.cfg.py + + - name: Run e2e tests + id: tests + run: ninja -C build-e2e check-sycl-e2e || echo "e2e tests have failed. Ignoring failure." + + - name: Get information about platform + if: ${{ always() }} + working-directory: ${{github.workspace}}/ur-repo + run: .github/scripts/get_system_info.sh + + # FIXME: Requires pull-request: write permissions but this is only granted + # on pull requests from forks if using pull_request_target workflow + # trigger but not the pull_request trigger.. + # - name: Add comment to PR + # uses: actions/github-script@60a0d83039c74a4aee543508d2ffcb1c3799cdea # v7.0.1 + # if: ${{ always() }} + # with: + # script: | + # const adapter = '${{ matrix.adapter.name }}'; + # const url = '${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}'; + # const test_status = '${{ steps.tests.outcome }}'; + # const job_status = '${{ job.status }}'; + # const body = `E2E ${adapter} build:\n${url}\nJob status: ${job_status}. Test status: ${test_status}`; + + # github.rest.issues.createComment({ + # issue_number: context.issue.number, + # owner: context.repo.owner, + # repo: context.repo.repo, + # body: body + # }) diff --git a/.github/workflows/e2e_cuda.yml b/.github/workflows/e2e_cuda.yml new file mode 100644 index 0000000000..c2f1d969b8 --- /dev/null +++ b/.github/workflows/e2e_cuda.yml @@ -0,0 +1,24 @@ +name: E2E Cuda + +on: + workflow_call: + +permissions: + contents: read + pull-requests: write + +jobs: + e2e-build-hw: + if: github.repository == 'oneapi-src/unified-runtime' # run only on upstream; forks will not have the HW + name: Start e2e job + # use core flow, run it with cuda specific parameters + uses: ./.github/workflows/e2e_core.yml + with: + name: "CUDA" + runner_tag: "CUDA_E2E" + str_name: "cuda" + prefix: "ext_oneapi_" + config: "--cuda" + unit: "gpu" + extra_lit_flags: "-sv --max-time=3600" + xfail: "Regression/device_num.cpp" diff --git a/.github/workflows/e2e_level_zero.yml b/.github/workflows/e2e_level_zero.yml new file mode 100644 index 0000000000..1fd814f271 --- /dev/null +++ b/.github/workflows/e2e_level_zero.yml @@ -0,0 +1,31 @@ +name: E2E Level Zero + +on: + workflow_call: + +permissions: + contents: read + pull-requests: write + +jobs: + e2e-build-hw: + if: github.repository == 'oneapi-src/unified-runtime' # run only on upstream; forks will not have the HW + name: Start e2e job + # use core flow, run it with L0 specific parameters + uses: ./.github/workflows/e2e_core.yml + with: + name: "L0" + runner_tag: "L0_E2E" + str_name: "level_zero" + prefix: "ext_oneapi_" + config: "" + unit: "gpu" + # Failing tests + xfail: "InvokeSimd/Regression/call_vadd_1d_spill.cpp;InvokeSimd/Regression/ImplicitSubgroup/call_vadd_1d_spill.cpp;ESIMD/mask_expand_load.cpp;Matrix/joint_matrix_prefetch.cpp;ESIMD/mask_expand_load.cpp;Matrix/SPVCooperativeMatrix/joint_matrix_prefetch.cpp;Matrix/joint_matrix_bf16_fill_k_cache_prefetch.cpp;Matrix/SPVCooperativeMatrix/element_wise_ops.cpp;" + # Unexpectedly Passed Tests + xfail_not: "" + # Flaky tests + filter_out: "Basic/accessor/accessor.cpp|DeviceArchitecture/device_architecture_comparison_on_device_aot.cpp|Graph/Explicit/interop-level-zero-launch-kernel.cpp|Graph/RecordReplay/interop-level-zero-launch-kernel.cpp|syclcompat/launch/launch_policy_lmem.cpp" + # These runners by default spawn upwards of 260 workers. + # We also add a time out just in case some test hangs + extra_lit_flags: "--param gpu-intel-pvc=True --param gpu-intel-pvc-1T=True -sv -j 100 --max-time=3600" diff --git a/.github/workflows/e2e_opencl.yml b/.github/workflows/e2e_opencl.yml new file mode 100644 index 0000000000..e4714b2434 --- /dev/null +++ b/.github/workflows/e2e_opencl.yml @@ -0,0 +1,24 @@ +name: E2E OpenCL + +on: + workflow_call: + +permissions: + contents: read + pull-requests: write + +jobs: + e2e-build-hw: + if: github.repository == 'oneapi-src/unified-runtime' # run only on upstream; forks will not have the HW + name: Start e2e job + # use core flow, run it with OpenCL specific parameters + uses: ./.github/workflows/e2e_core.yml + with: + name: "OPENCL" + runner_tag: "OPENCL" + str_name: "opencl" + prefix: "" + config: "" + unit: "cpu" + xfail: "AOT/double.cpp;AOT/half.cpp;AOT/reqd-sg-size.cpp;Basic/built-ins/marray_geometric.cpp;KernelCompiler/kernel_compiler_spirv.cpp;KernelCompiler/opencl_queries.cpp;NonUniformGroups/ballot_group.cpp;NonUniformGroups/ballot_group_algorithms.cpp;NonUniformGroups/fixed_size_group_algorithms.cpp;NonUniformGroups/opportunistic_group.cpp;NonUniformGroups/opportunistic_group_algorithms.cpp;NonUniformGroups/tangle_group.cpp;NonUniformGroups/tangle_group_algorithms.cpp" + extra_lit_flags: "-sv --max-time=3600" diff --git a/.github/workflows/multi_device.yml b/.github/workflows/multi_device.yml index 5334e86b87..2dd5d60352 100644 --- a/.github/workflows/multi_device.yml +++ b/.github/workflows/multi_device.yml @@ -13,7 +13,7 @@ permissions: jobs: examples: name: Multi Device testing - if: github.repository == 'oneapi-src/unified-runtime' # run only on upstream; forks won't have the HW + if: false strategy: matrix: adapter: [ diff --git a/.github/workflows/source-checks.yml b/.github/workflows/source-checks.yml new file mode 100644 index 0000000000..8c43adf952 --- /dev/null +++ b/.github/workflows/source-checks.yml @@ -0,0 +1,66 @@ +on: + workflow_call: + +permissions: + contents: read + +jobs: + source-checks: + if: false + strategy: + matrix: + os: ['ubuntu-22.04', 'windows-2022'] + + runs-on: ${{matrix.os}} + + steps: + - uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1 + + - uses: actions/setup-python@0a5c61591373683505ea898e09a3ea4f39ef2b9c # v5.0.0 + with: + python-version: 3.9 + + - name: Install pip packages + run: pip install -r third_party/requirements.txt + + - name: "[Lin] Install doxygen" + if: matrix.os == 'ubuntu-22.04' + run: | + sudo apt-get update + sudo apt-get install -y doxygen + + - name: "[Win] Install doxygen" + if: matrix.os == 'windows-2022' + run: | + $WorkingDir = $PWD.Path + Invoke-WebRequest -Uri https://github.com/doxygen/doxygen/releases/download/Release_1_9_8/doxygen-1.9.8.windows.x64.bin.zip -OutFile "$WorkingDir\doxygen.zip" + Expand-Archive -Path "$WorkingDir\doxygen.zip" + Add-Content $env:GITHUB_PATH "$WorkingDir\doxygen" + + - name: "[Lin] Install hwloc" + if: matrix.os == 'ubuntu-22.04' + run: .github/scripts/install_hwloc.sh + + - name: "[Win] Install hwloc" + if: matrix.os == 'windows-2022' + run: vcpkg install hwloc:x64-windows + + - name: Configure CMake + env: + VCPKG_PATH: "C:/vcpkg/packages/hwloc_x64-windows" + run: > + cmake + -B${{github.workspace}}/build + -DCMAKE_PREFIX_PATH="${{env.VCPKG_PATH}}" + -DUR_ENABLE_TRACING=OFF + -DCMAKE_BUILD_TYPE=Debug + -DUR_BUILD_TESTS=OFF + -DUR_FORMAT_CPP_STYLE=ON + + # Verifying license should be enough on a single OS + - name: Verify that each source file contains a license + if: matrix.os == 'ubuntu-22.04' + run: cmake --build ${{github.workspace}}/build --target verify-licenses + + - name: Generate source from spec, check for uncommitted diff + run: cmake --build ${{github.workspace}}/build --target check-generated diff --git a/.github/workflows/trivy.yml b/.github/workflows/trivy.yml index c2ef1d47e7..ba7271180f 100644 --- a/.github/workflows/trivy.yml +++ b/.github/workflows/trivy.yml @@ -18,6 +18,7 @@ permissions: jobs: linux: + if: false name: Trivy runs-on: ${{ github.repository_owner == 'oneapi-src' && 'intel-ubuntu-22.04' || 'ubuntu-latest' }} permissions: diff --git a/.github/workflows/unified-runtime.yml b/.github/workflows/unified-runtime.yml new file mode 100644 index 0000000000..a454d77fb6 --- /dev/null +++ b/.github/workflows/unified-runtime.yml @@ -0,0 +1,18 @@ +name: Unified Runtime + +on: [push, pull_request] + +concurrency: + group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }} + cancel-in-progress: true + +permissions: + contents: read + +jobs: + cuda: + name: CUDA + uses: ./.github/workflows/build-hw-reusable.yml + with: + adapter_name: CUDA + runner_name: CUDA diff --git a/test/adapters/cuda/context_tests.cpp b/test/adapters/cuda/context_tests.cpp index 77d0b42cd0..4f4df4683c 100644 --- a/test/adapters/cuda/context_tests.cpp +++ b/test/adapters/cuda/context_tests.cpp @@ -43,37 +43,6 @@ TEST_P(cudaUrContextCreateTest, CreateWithChildThread) { callContextFromOtherThread.join(); } -TEST_P(cudaUrContextCreateTest, ActiveContext) { - uur::raii::Context context = nullptr; - ASSERT_SUCCESS(urContextCreate(1, &device, nullptr, context.ptr())); - ASSERT_NE(context, nullptr); - - uur::raii::Queue queue = nullptr; - ur_queue_properties_t queue_props{UR_STRUCTURE_TYPE_QUEUE_PROPERTIES, nullptr, - 0}; - ASSERT_SUCCESS(urQueueCreate(context, device, &queue_props, queue.ptr())); - ASSERT_NE(queue, nullptr); - - // check that the queue has the correct context - ASSERT_EQ(context, queue->getContext()); - - // create a buffer - uur::raii::Mem buffer = nullptr; - ASSERT_SUCCESS(urMemBufferCreate(context, UR_MEM_FLAG_READ_WRITE, 1024, - nullptr, buffer.ptr())); - ASSERT_NE(buffer, nullptr); - - // check that the context is now the active CUDA context - CUcontext cudaCtx = nullptr; - ASSERT_SUCCESS_CUDA(cuCtxGetCurrent(&cudaCtx)); - ASSERT_NE(cudaCtx, nullptr); - - ur_native_handle_t native_context = 0; - ASSERT_SUCCESS(urContextGetNativeHandle(context, &native_context)); - ASSERT_NE(reinterpret_cast(native_context), nullptr); - ASSERT_EQ(cudaCtx, reinterpret_cast(native_context)); -} - TEST_P(cudaUrContextCreateTest, ContextLifetimeExisting) { // start by setting up a CUDA context on the thread CUcontext original; diff --git a/test/adapters/cuda/memory_tests.cpp b/test/adapters/cuda/memory_tests.cpp index 6839b0b95f..ac4bfefdd7 100644 --- a/test/adapters/cuda/memory_tests.cpp +++ b/test/adapters/cuda/memory_tests.cpp @@ -14,11 +14,12 @@ TEST_P(cudaMemoryTest, urMemBufferNoActiveContext) { constexpr size_t memSize = 1024u; CUcontext current = nullptr; - do { + ASSERT_SUCCESS_CUDA(cuCtxGetCurrent(¤t)); + while (current != nullptr) { CUcontext oldContext = nullptr; ASSERT_SUCCESS_CUDA(cuCtxPopCurrent(&oldContext)); ASSERT_SUCCESS_CUDA(cuCtxGetCurrent(¤t)); - } while (current != nullptr); + } uur::raii::Mem mem; ASSERT_SUCCESS(urMemBufferCreate(context, UR_MEM_FLAG_READ_WRITE, memSize, diff --git a/test/conformance/enqueue/helpers.h b/test/conformance/enqueue/helpers.h index a4d127c4e5..fd8c428a52 100644 --- a/test/conformance/enqueue/helpers.h +++ b/test/conformance/enqueue/helpers.h @@ -203,7 +203,15 @@ struct urMultiQueueMultiDeviceTestWithParam urContextCreate(devices.size(), devices.data(), nullptr, &context)); // Duplicate our devices until we hit the minimum size specified. - auto srcDevices = devices; + std::vector srcDevices; + // If the test actually only wants one device duplicated a bunch of times + // we take devices[0] and discard any other devices that were discovered. + if (trueMultiDevice) { + srcDevices = devices; + } else { + srcDevices.push_back(devices[0]); + devices.clear(); + } while (devices.size() < minDevices) { devices.insert(devices.end(), srcDevices.begin(), srcDevices.end()); } @@ -224,6 +232,7 @@ struct urMultiQueueMultiDeviceTestWithParam ur_context_handle_t context; std::vector queues; + bool trueMultiDevice = true; }; } // namespace uur diff --git a/test/conformance/enqueue/urEnqueueKernelLaunch.cpp b/test/conformance/enqueue/urEnqueueKernelLaunch.cpp index ef5c0228ed..3dd977d556 100644 --- a/test/conformance/enqueue/urEnqueueKernelLaunch.cpp +++ b/test/conformance/enqueue/urEnqueueKernelLaunch.cpp @@ -565,7 +565,7 @@ UUR_INSTANTIATE_PLATFORM_TEST_SUITE(urEnqueueKernelLaunchMultiDeviceTest); // TODO: rewrite this test, right now it only works for a single queue // (the context is only created for one device) TEST_P(urEnqueueKernelLaunchMultiDeviceTest, KernelLaunchReadDifferentQueues) { - UUR_KNOWN_FAILURE_ON(uur::LevelZero{}, uur::LevelZeroV2{}); + UUR_KNOWN_FAILURE_ON(uur::CUDA{}, uur::LevelZero{}, uur::LevelZeroV2{}); uur::KernelLaunchHelper helper = uur::KernelLaunchHelper{platform, context, kernel, queues[0]}; diff --git a/test/conformance/enqueue/urEnqueueKernelLaunchAndMemcpyInOrder.cpp b/test/conformance/enqueue/urEnqueueKernelLaunchAndMemcpyInOrder.cpp index c97471aee8..1102892d15 100644 --- a/test/conformance/enqueue/urEnqueueKernelLaunchAndMemcpyInOrder.cpp +++ b/test/conformance/enqueue/urEnqueueKernelLaunchAndMemcpyInOrder.cpp @@ -155,13 +155,14 @@ struct urEnqueueKernelLaunchIncrementTest using Param = uur::BoolTestParam; - using urMultiQueueLaunchMemcpyTest::context; using urMultiQueueLaunchMemcpyTest::queues; - using urMultiQueueLaunchMemcpyTest::devices; using urMultiQueueLaunchMemcpyTest::kernels; using urMultiQueueLaunchMemcpyTest::SharedMem; void SetUp() override { + // We actually need a single device used multiple times for this test, as + // opposed to utilizing all available devices for the platform. + this->trueMultiDevice = false; UUR_RETURN_ON_FATAL_FAILURE( urMultiQueueLaunchMemcpyTest:: SetUp()); // Use single device, duplicated numOps times @@ -344,9 +345,28 @@ TEST_P(urEnqueueKernelLaunchIncrementMultiDeviceTest, Success) { } } -using urEnqueueKernelLaunchIncrementMultiDeviceMultiThreadTest = - urEnqueueKernelLaunchIncrementMultiDeviceTestWithParam< - std::tuple>; +struct urEnqueueKernelLaunchIncrementMultiDeviceMultiThreadTest + : urEnqueueKernelLaunchIncrementMultiDeviceTestWithParam< + std::tuple> { + using Param = std::tuple; + + using urEnqueueKernelLaunchIncrementMultiDeviceTestWithParam::devices; + using urEnqueueKernelLaunchIncrementMultiDeviceTestWithParam::queues; + using urEnqueueKernelLaunchIncrementMultiDeviceTestWithParam::kernels; + using urEnqueueKernelLaunchIncrementMultiDeviceTestWithParam< + Param>::SharedMem; + + void SetUp() override { + useEvents = std::get<0>(getParam()).value; + queuePerThread = std::get<1>(getParam()).value; + // With !queuePerThread this becomes a test on a single device + this->trueMultiDevice = queuePerThread; + urEnqueueKernelLaunchIncrementMultiDeviceTestWithParam::SetUp(); + } + + bool useEvents; + bool queuePerThread; +}; UUR_PLATFORM_TEST_SUITE_WITH_PARAM( urEnqueueKernelLaunchIncrementMultiDeviceMultiThreadTest, @@ -356,11 +376,7 @@ UUR_PLATFORM_TEST_SUITE_WITH_PARAM( printParams); // Enqueue kernelLaunch concurrently from multiple threads -// With !queuePerThread this becomes a test on a single device TEST_P(urEnqueueKernelLaunchIncrementMultiDeviceMultiThreadTest, Success) { - auto useEvents = std::get<0>(getParam()).value; - auto queuePerThread = std::get<1>(getParam()).value; - if (!queuePerThread) { UUR_KNOWN_FAILURE_ON(uur::LevelZero{}, uur::LevelZeroV2{}); } @@ -371,11 +387,11 @@ TEST_P(urEnqueueKernelLaunchIncrementMultiDeviceMultiThreadTest, Success) { static constexpr size_t numOpsPerThread = 6; for (size_t i = 0; i < numThreads; i++) { - threads.emplace_back([this, i, queuePerThread, useEvents]() { + threads.emplace_back([this, i]() { constexpr size_t global_offset = 0; constexpr size_t n_dimensions = 1; - auto queue = queuePerThread ? queues[i] : queues.back(); + auto queue = this->queuePerThread ? queues[i] : queues.back(); auto kernel = kernels[i]; auto sharedPtr = SharedMem[i]; @@ -385,7 +401,7 @@ TEST_P(urEnqueueKernelLaunchIncrementMultiDeviceMultiThreadTest, Success) { ur_event_handle_t *lastEvent = nullptr; ur_event_handle_t *signalEvent = nullptr; - if (useEvents) { + if (this->useEvents) { waitNum = j > 0 ? 1 : 0; lastEvent = j > 0 ? Events[j - 1].ptr() : nullptr; signalEvent = Events[j].ptr();