Skip to content

Commit d8cf20c

Browse files
Added Sycl Graph samples
1 parent d709cdf commit d8cf20c

11 files changed

+777
-0
lines changed
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
# ==============================================================
2+
# Copyright © 2025 Codeplay Software
3+
#
4+
# SPDX-License-Identifier: MIT
5+
# =============================================================
6+
7+
cmake_minimum_required(VERSION 3.12)
8+
project(SYCL-Graph-Samples)
9+
10+
# Set global flags
11+
set(CMAKE_CXX_STANDARD 17)
12+
13+
# Configure SYCL
14+
include(cmake/ConfigureSYCL.cmake)
15+
16+
# Output directory for executables
17+
set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR} CACHE PATH "" FORCE)
18+
19+
# Find all .cpp files in the Samples/ directory
20+
file(GLOB SAMPLE_SOURCES "${CMAKE_SOURCE_DIR}/Samples/*.cpp")
21+
22+
# Add executable for each .cpp file
23+
foreach(SOURCE_FILE ${SAMPLE_SOURCES})
24+
# Extract the file name without the extension
25+
get_filename_component(EXE_NAME ${SOURCE_FILE} NAME_WE)
26+
27+
# Create executable
28+
add_executable(${EXE_NAME} ${SOURCE_FILE})
29+
30+
# Add SYCL flags
31+
target_compile_options(${EXE_NAME} PUBLIC ${SYCL_FLAGS})
32+
target_link_options(${EXE_NAME} PUBLIC ${SYCL_FLAGS})
33+
endforeach()
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,39 @@
1+
# SYCL-Graph Samples
2+
3+
Code examples demonstrating the usage of [`sycl_ext_oneapi_graph`](https://github.com/intel/llvm/blob/sycl/sycl/doc/extensions/experimental/sycl_ext_oneapi_graph.asciidoc) extension.
4+
5+
| **Example** | **Description** |
6+
| --------------- | --------------- |
7+
| [Dot Product](Samples/dotProduct.cpp) | This example uses the explicit graph creation API to perform a dot product operation. |
8+
| [Diamond Dependency](Samples/diamondDependency.cpp) | This code example shows how a SYCL queue can be put into a recording state, which allows a `command_graph` object to be populated by the command-groups submitted to the queue. Once the graph is complete, recording finishes on the queue to put it back into the default executing state. The graph is then finalized so that no more nodes can be added. Lastly, the graph is submitted in its entirety for execution via `handler::ext_oneapi_graph(command_graph<graph_state::executable>)`. |
9+
| [Dynamic Parameter Update](Samples/dynamicParamUpdateUSM.cpp) | An example showing a graph with a single kernel node that is created using a free function kernel with `handler::set_args()` and having its node arguments updated. Additionally, [dynamicParamUpdateBuffers.cpp](Samples/dynamicParamUpdateBuffers.cpp) demonstrates using this feature with buffers and accessors. |
10+
| [Dynamic Command Groups](Samples/dynamicCG.cpp) | Example showing how a graph with a dynamic command group node can be updated.|
11+
| [Dynamic Command Groups With Dynamic Parameters](Samples/dynamicCG_with_Params.cpp) | Example showing how a graph with a dynamic command group that uses dynamic parameters in a node can be updated.|
12+
| [Whole Graph Update](Samples/whole_graph_update.cpp) | Example that shows recording and updating several nodes with different parameters using whole-graph update.|
13+
14+
## Dependencies
15+
The CMake configuration assumes usage of the DPC++ compiler. Both the [Intel DPC++ release](https://www.intel.com/content/www/us/en/developer/tools/oneapi/dpc-compiler.html) and the [open source version](https://github.com/intel/llvm) are compatible.
16+
17+
## Building
18+
The project uses a standard CMake build configuration system. Ensure the SYCL compiler is used by the configuration either by setting the environment variable `CXX=<compiler>` or passing the configuration flag
19+
`-DCMAKE_CXX_COMPILER=<compiler>` where `<compiler>` is your SYCL compiler's
20+
executable (for example Intel `icpx` or LLVM `clang++`).
21+
22+
To check out the repository and build the examples, use simply:
23+
```
24+
mkdir build && cd build
25+
cmake .. -DCMAKE_CXX_COMPILER=<compiler>
26+
cmake --build .
27+
```
28+
The CMake configuration automatically detects the available SYCL backends and
29+
enables the SPIR/CUDA/HIP targets for the device code, including the corresponding
30+
architecture flags. If desired, these auto-configured cmake options may be overridden
31+
with the following ones:
32+
33+
| `<OPTION>` | `<VALUE>` |
34+
| ---------- | ---------- |
35+
| `ENABLE_SPIR` | `ON` or `OFF` |
36+
| `ENABLE_CUDA` | `ON` or `OFF` |
37+
| `ENABLE_HIP` | `ON` or `OFF` |
38+
| `CUDA_COMPUTE_CAPABILITY` | Integer, e.g. `70` meaning capability 7.0 (arch `sm_70`) |
39+
| `HIP_GFX_ARCH` | String, e.g. `gfx1030` |
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,66 @@
1+
// ==============================================================
2+
// Copyright © 2025 Codeplay Software
3+
//
4+
// SPDX-License-Identifier: MIT
5+
// =============================================================
6+
7+
#pragma once
8+
9+
#include <sycl/sycl.hpp>
10+
11+
#include <iostream>
12+
#include <string>
13+
14+
inline void ensure_full_aspects_support(const sycl::device &dev) {
15+
std::string error_msg;
16+
17+
if (!dev.has(sycl::aspect::ext_oneapi_graph)) {
18+
error_msg += "Error: The device does NOT support ext_oneapi_graph. \n";
19+
}
20+
if (!dev.has(sycl::aspect::ext_oneapi_limited_graph)) {
21+
error_msg +=
22+
"Error: The device does NOT support ext_oneapi_limited_graph. \n";
23+
}
24+
if (!dev.has(sycl::aspect::usm_shared_allocations)) {
25+
error_msg +=
26+
"Error: The device does NOT support usm_shared_allocations. \n";
27+
}
28+
29+
if (!error_msg.empty()) {
30+
std::cerr << error_msg;
31+
std::exit(1);
32+
}
33+
};
34+
35+
inline void ensure_required_aspects_support(const sycl::device &dev) {
36+
std::string error_msg;
37+
38+
if (!dev.has(sycl::aspect::ext_oneapi_limited_graph)) {
39+
error_msg +=
40+
"Error: The device does NOT support ext_oneapi_limited_graph. \n";
41+
}
42+
if (!dev.has(sycl::aspect::usm_shared_allocations)) {
43+
error_msg +=
44+
"Error: The device does NOT support usm_shared_allocations. \n";
45+
}
46+
47+
if (!error_msg.empty()) {
48+
std::cerr << error_msg;
49+
std::exit(1);
50+
}
51+
};
52+
53+
inline void ensure_full_graph_support(const sycl::device &dev) {
54+
if (!dev.has(sycl::aspect::ext_oneapi_graph)) {
55+
std::cerr << "Error: The device does NOT support ext_oneapi_graph.\n";
56+
std::exit(1);
57+
}
58+
};
59+
60+
inline void ensure_graph_support(const sycl::device &dev) {
61+
if (!dev.has(sycl::aspect::ext_oneapi_limited_graph)) {
62+
std::cerr
63+
<< "Error: The device does NOT support ext_oneapi_limited_graph.\n";
64+
std::exit(1);
65+
}
66+
};
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,96 @@
1+
// ==============================================================
2+
// Copyright © 2025 Codeplay Software
3+
//
4+
// SPDX-License-Identifier: MIT
5+
// =============================================================
6+
7+
#include "common/aspect_queries.hpp"
8+
9+
#include <sycl/sycl.hpp>
10+
11+
namespace sycl_ext = sycl::ext::oneapi::experimental;
12+
using namespace sycl;
13+
14+
int main() {
15+
constexpr size_t Size = 1024;
16+
17+
queue Queue{};
18+
19+
ensure_graph_support(Queue.get_device());
20+
21+
std::vector<int> DataA(Size), DataB(Size), DataC(Size);
22+
23+
// Lifetime of buffers must exceed the lifetime of graphs they are used in.
24+
buffer<int> BufferA{DataA.data(), range<1>{Size}};
25+
BufferA.set_write_back(false);
26+
buffer<int> BufferB{DataB.data(), range<1>{Size}};
27+
BufferB.set_write_back(false);
28+
buffer<int> BufferC{DataC.data(), range<1>{Size}};
29+
BufferC.set_write_back(false);
30+
31+
{
32+
// New object representing graph of command-groups
33+
sycl_ext::command_graph Graph(
34+
Queue.get_context(), Queue.get_device(),
35+
{sycl_ext::property::graph::assume_buffer_outlives_graph{}});
36+
37+
// `Queue` will be put in the recording state where commands are recorded to
38+
// `Graph` rather than submitted for execution immediately.
39+
Graph.begin_recording(Queue);
40+
41+
// Record commands to `Graph` with the following topology.
42+
//
43+
// increment_kernel
44+
// / \
45+
// A->/ A->\
46+
// / \
47+
// add_kernel subtract_kernel
48+
// \ /
49+
// B->\ C->/
50+
// \ /
51+
// decrement_kernel
52+
53+
Queue.submit([&](handler &CGH) {
54+
auto Pdata = BufferA.get_access<access::mode::read_write>(CGH);
55+
CGH.parallel_for<class Increment_kernel>(
56+
range<1>(Size), [=](item<1> Id) { Pdata[Id]++; });
57+
});
58+
59+
Queue.submit([&](handler &CGH) {
60+
auto Pdata1 = BufferA.get_access<access::mode::read>(CGH);
61+
auto Pdata2 = BufferB.get_access<access::mode::read_write>(CGH);
62+
CGH.parallel_for<class Add_kernel>(
63+
range<1>(Size), [=](item<1> Id) { Pdata2[Id] += Pdata1[Id]; });
64+
});
65+
66+
Queue.submit([&](handler &CGH) {
67+
auto Pdata1 = BufferA.get_access<access::mode::read>(CGH);
68+
auto Pdata2 = BufferC.get_access<access::mode::read_write>(CGH);
69+
CGH.parallel_for<class Subtract_kernel>(
70+
range<1>(Size), [=](item<1> Id) { Pdata2[Id] -= Pdata1[Id]; });
71+
});
72+
73+
Queue.submit([&](handler &CGH) {
74+
auto Pdata1 = BufferB.get_access<access::mode::read_write>(CGH);
75+
auto Pdata2 = BufferC.get_access<access::mode::read_write>(CGH);
76+
CGH.parallel_for<class Decrement_kernel>(range<1>(Size), [=](item<1> Id) {
77+
Pdata1[Id]--;
78+
Pdata2[Id]--;
79+
});
80+
});
81+
82+
// `Queue` will be returned to the executing state where commands are
83+
// submitted immediately for extension.
84+
Graph.end_recording();
85+
86+
// Finalize the modifiable graph to create an executable graph that can be
87+
// submitted for execution.
88+
auto Exec_graph = Graph.finalize();
89+
90+
// Execute graph
91+
Queue.submit([&](handler &CGH) { CGH.ext_oneapi_graph(Exec_graph); })
92+
.wait();
93+
}
94+
95+
return 0;
96+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,90 @@
1+
// ==============================================================
2+
// Copyright © 2025 Codeplay Software
3+
//
4+
// SPDX-License-Identifier: MIT
5+
// =============================================================
6+
7+
#include "common/aspect_queries.hpp"
8+
9+
#include <sycl/sycl.hpp>
10+
11+
namespace sycl_ext = sycl::ext::oneapi::experimental;
12+
using namespace sycl;
13+
14+
int main() {
15+
constexpr size_t Size = 10;
16+
17+
float Alpha = 1.0f;
18+
float Beta = 2.0f;
19+
float Gamma = 3.0f;
20+
21+
queue Queue{};
22+
23+
ensure_required_aspects_support(Queue.get_device());
24+
25+
sycl_ext::command_graph Graph(Queue.get_context(), Queue.get_device());
26+
27+
float *Dotp = malloc_shared<float>(1, Queue);
28+
float *X = malloc_device<float>(Size, Queue);
29+
float *Y = malloc_device<float>(Size, Queue);
30+
float *Z = malloc_device<float>(Size, Queue);
31+
32+
// Add commands to the graph to create the following topology.
33+
//
34+
// i
35+
// / \
36+
// a b
37+
// \ /
38+
// c
39+
40+
// init data on the device
41+
auto Node_i = Graph.add([&](handler &CGH) {
42+
CGH.parallel_for(Size, [=](id<1> Id) {
43+
const size_t i = Id[0];
44+
X[i] = 1.0f;
45+
Y[i] = 3.0f;
46+
Z[i] = 2.0f;
47+
});
48+
});
49+
50+
auto Node_a = Graph.add(
51+
[&](handler &CGH) {
52+
CGH.parallel_for(range<1>{Size}, [=](id<1> Id) {
53+
const size_t i = Id[0];
54+
X[i] = Alpha * X[i] + Beta * Y[i];
55+
});
56+
},
57+
{sycl_ext::property::node::depends_on(Node_i)});
58+
59+
auto Node_b = Graph.add(
60+
[&](handler &CGH) {
61+
CGH.parallel_for(range<1>{Size}, [=](id<1> Id) {
62+
const size_t i = Id[0];
63+
Z[i] = Gamma * Z[i] + Beta * Y[i];
64+
});
65+
},
66+
{sycl_ext::property::node::depends_on(Node_i)});
67+
68+
auto Node_c = Graph.add(
69+
[&](handler &CGH) {
70+
CGH.single_task([=]() {
71+
for (size_t i = 0; i < Size; i++) {
72+
*Dotp += X[i] * Z[i];
73+
}
74+
});
75+
},
76+
{sycl_ext::property::node::depends_on(Node_a, Node_b)});
77+
78+
auto Exec = Graph.finalize();
79+
80+
// use queue shortcut for graph submission
81+
Queue.ext_oneapi_graph(Exec).wait();
82+
83+
// memory can be freed inside or outside the graph
84+
free(X, Queue);
85+
free(Y, Queue);
86+
free(Z, Queue);
87+
free(Dotp, Queue);
88+
89+
return 0;
90+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,61 @@
1+
// ==============================================================
2+
// Copyright © 2025 Codeplay Software
3+
//
4+
// SPDX-License-Identifier: MIT
5+
// =============================================================
6+
7+
#include "common/aspect_queries.hpp"
8+
9+
#include <sycl/sycl.hpp>
10+
11+
namespace sycl_ext = sycl::ext::oneapi::experimental;
12+
using namespace sycl;
13+
14+
int main() {
15+
constexpr size_t Size = 1024;
16+
17+
queue Queue{};
18+
19+
ensure_full_graph_support(Queue.get_device());
20+
21+
sycl_ext::command_graph Graph{Queue.get_context(), Queue.get_device()};
22+
23+
int *Ptr = malloc_device<int>(Size, Queue);
24+
25+
int PatternA = 42;
26+
auto CGFA = [&](handler &CGH) {
27+
CGH.parallel_for(Size,
28+
[=](item<1> Item) { Ptr[Item.get_id()] = PatternA; });
29+
};
30+
31+
int PatternB = 0xA;
32+
auto CGFB = [&](handler &CGH) {
33+
CGH.parallel_for(Size,
34+
[=](item<1> Item) { Ptr[Item.get_id()] = PatternB; });
35+
};
36+
37+
// Construct a dynamic command-group with CGFA as the active cgf (index 0).
38+
auto DynamicCG = sycl_ext::dynamic_command_group(Graph, {CGFA, CGFB});
39+
40+
// Create a dynamic command-group graph node.
41+
auto DynamicCGNode = Graph.add(DynamicCG);
42+
43+
auto ExecGraph = Graph.finalize(sycl_ext::property::graph::updatable{});
44+
45+
// The graph will execute CGFA.
46+
Queue.ext_oneapi_graph(ExecGraph).wait();
47+
48+
// Sets CgfB as active in the dynamic command-group (index 1).
49+
DynamicCG.set_active_index(1);
50+
51+
// Calls update to update the executable graph node with the changes to
52+
// DynamicCG.
53+
ExecGraph.update(DynamicCGNode);
54+
55+
// The graph will execute CGFB
56+
Queue.ext_oneapi_graph(ExecGraph).wait();
57+
58+
sycl::free(Ptr, Queue);
59+
60+
return 0;
61+
}

0 commit comments

Comments
 (0)