7
7
#include " utils/array_utils.h"
8
8
#include < array>
9
9
#include < cstdio>
10
+ #include < future>
10
11
11
12
using namespace gpu ;
12
13
@@ -21,9 +22,12 @@ template <size_t N> std::array<float, N> makeData() {
21
22
}
22
23
23
24
template <size_t N, size_t R = N, size_t C = 1 > void showResult (Context &ctx, Kernel &op, Tensor &output) {
24
- DispatchKernel (ctx, op);
25
+
26
+ std::promise<void > promise;
27
+ std::future<void > future = promise.get_future ();
28
+ DispatchKernel (ctx, op, promise);
25
29
std::array<float , R * C> outputArr;
26
- Wait (ctx, op. future );
30
+ Wait (ctx, future);
27
31
ToCPU (ctx, output, outputArr.data (), sizeof (outputArr));
28
32
printf (" %s" , show<float , R, C>(outputArr, " output" ).c_str ());
29
33
}
@@ -48,7 +52,7 @@ void puzzle1(Context &ctx) {
48
52
printf (" \n\n Puzzle 1\n\n " );
49
53
Tensor input = CreateTensor (ctx, {N}, kf32, makeData<N>().data ());
50
54
Tensor output = CreateTensor (ctx, {N}, kf32);
51
- Kernel op = CreateKernel (ctx, CreateShader (kPuzzle1 , N), input, output,
55
+ Kernel op = CreateKernel (ctx, CreateShader (kPuzzle1 , N), TensorList{ input, output} ,
52
56
/* nthreads*/ {N, 1 , 1 });
53
57
showResult<N>(ctx, op, output);
54
58
}
@@ -75,8 +79,8 @@ void puzzle2(Context &ctx) {
75
79
Tensor a = CreateTensor (ctx, {N}, kf32, makeData<N>().data ());
76
80
Tensor b = CreateTensor (ctx, {N}, kf32, makeData<N>().data ());
77
81
Tensor output = CreateTensor (ctx, {N}, kf32);
78
- Kernel op = CreateKernel (ctx, CreateShader (kPuzzle2 , 256 ), Tensors {a, b},
79
- output, {N, 1 , 1 });
82
+ Kernel op = CreateKernel (ctx, CreateShader (kPuzzle2 , 256 ), TensorList {a, b, output },
83
+ {N, 1 , 1 });
80
84
showResult<N>(ctx, op, output);
81
85
}
82
86
@@ -101,7 +105,7 @@ void puzzle3(Context &ctx) {
101
105
Tensor input = CreateTensor (ctx, {N}, kf32, makeData<N>().data ());
102
106
Tensor output = CreateTensor (ctx, {N}, kf32);
103
107
Kernel op =
104
- CreateKernel (ctx, CreateShader (kPuzzle3 , 4 ), input, output, {N, 1 , 1 });
108
+ CreateKernel (ctx, CreateShader (kPuzzle3 , 4 ), TensorList{ input, output} , {N, 1 , 1 });
105
109
showResult<N>(ctx, op, output);
106
110
}
107
111
@@ -135,7 +139,7 @@ void puzzle4(Context &ctx) {
135
139
};
136
140
Kernel op =
137
141
CreateKernel (ctx, CreateShader (kPuzzle4 , /* workgroup size*/ {N, N, 1 }),
138
- input, output, {N, N, 1 }, Params{N});
142
+ TensorList{ input, output} , {N, N, 1 }, Params{N});
139
143
showResult<N, N, N>(ctx, op, output);
140
144
}
141
145
0 commit comments