Skip to content

Commit cb77adc

Browse files
committed
Refactor - fold output into one of the TensorList arguments, get rid of MultiKernel (for now), update examples accordingly
1 parent 3570190 commit cb77adc

File tree

8 files changed

+125
-521
lines changed

8 files changed

+125
-521
lines changed

Makefile

+1-1
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,7 @@ watch-tests: check-entr check-dependencies
3838
all: build
3939
cd examples/gpu_puzzles && make
4040
cd examples/hello_world && make
41-
cd examples/raymarch && make
41+
cd examples/render && make
4242
cd examples/webgpu_intro && make
4343

4444
clean-build:

examples/gpu_puzzles/run.cpp

+11-7
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
#include "utils/array_utils.h"
88
#include <array>
99
#include <cstdio>
10+
#include <future>
1011

1112
using namespace gpu;
1213

@@ -21,9 +22,12 @@ template <size_t N> std::array<float, N> makeData() {
2122
}
2223

2324
template <size_t N, size_t R = N, size_t C = 1> void showResult(Context &ctx, Kernel &op, Tensor &output) {
24-
DispatchKernel(ctx, op);
25+
26+
std::promise<void> promise;
27+
std::future<void> future = promise.get_future();
28+
DispatchKernel(ctx, op, promise);
2529
std::array<float, R * C> outputArr;
26-
Wait(ctx, op.future);
30+
Wait(ctx, future);
2731
ToCPU(ctx, output, outputArr.data(), sizeof(outputArr));
2832
printf("%s", show<float, R, C>(outputArr, "output").c_str());
2933
}
@@ -48,7 +52,7 @@ void puzzle1(Context &ctx) {
4852
printf("\n\nPuzzle 1\n\n");
4953
Tensor input = CreateTensor(ctx, {N}, kf32, makeData<N>().data());
5054
Tensor output = CreateTensor(ctx, {N}, kf32);
51-
Kernel op = CreateKernel(ctx, CreateShader(kPuzzle1, N), input, output,
55+
Kernel op = CreateKernel(ctx, CreateShader(kPuzzle1, N), TensorList{input, output},
5256
/*nthreads*/ {N, 1, 1});
5357
showResult<N>(ctx, op, output);
5458
}
@@ -75,8 +79,8 @@ void puzzle2(Context &ctx) {
7579
Tensor a = CreateTensor(ctx, {N}, kf32, makeData<N>().data());
7680
Tensor b = CreateTensor(ctx, {N}, kf32, makeData<N>().data());
7781
Tensor output = CreateTensor(ctx, {N}, kf32);
78-
Kernel op = CreateKernel(ctx, CreateShader(kPuzzle2, 256), Tensors{a, b},
79-
output, {N, 1, 1});
82+
Kernel op = CreateKernel(ctx, CreateShader(kPuzzle2, 256), TensorList{a, b, output},
83+
{N, 1, 1});
8084
showResult<N>(ctx, op, output);
8185
}
8286

@@ -101,7 +105,7 @@ void puzzle3(Context &ctx) {
101105
Tensor input = CreateTensor(ctx, {N}, kf32, makeData<N>().data());
102106
Tensor output = CreateTensor(ctx, {N}, kf32);
103107
Kernel op =
104-
CreateKernel(ctx, CreateShader(kPuzzle3, 4), input, output, {N, 1, 1});
108+
CreateKernel(ctx, CreateShader(kPuzzle3, 4), TensorList{input, output}, {N, 1, 1});
105109
showResult<N>(ctx, op, output);
106110
}
107111

@@ -135,7 +139,7 @@ void puzzle4(Context &ctx) {
135139
};
136140
Kernel op =
137141
CreateKernel(ctx, CreateShader(kPuzzle4, /*workgroup size*/ {N, N, 1}),
138-
input, output, {N, N, 1}, Params{N});
142+
TensorList{input, output}, {N, N, 1}, Params{N});
139143
showResult<N, N, N>(ctx, op, output);
140144
}
141145

examples/hello_world/run.cpp

+7-3
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
#include "gpu.h"
22
#include <array>
33
#include <cstdio>
4+
#include <future>
45

56
using namespace gpu; // CreateContext, CreateTensor, CreateKernel,
67
// CreateShader, DispatchKernel, Wait, ToCPU
@@ -10,6 +11,7 @@ static const char *kGelu = R"(
1011
const GELU_SCALING_FACTOR: f32 = 0.7978845608028654; // sqrt(2.0 / PI)
1112
@group(0) @binding(0) var<storage, read_write> inp: array<{{precision}}>;
1213
@group(0) @binding(1) var<storage, read_write> out: array<{{precision}}>;
14+
@group(0) @binding(1) var<storage, read_write> dummy: array<{{precision}}>;
1315
@compute @workgroup_size({{workgroupSize}})
1416
fn main(
1517
@builtin(global_invocation_id) GlobalInvocationID: vec3<u32>) {
@@ -33,10 +35,12 @@ int main(int argc, char **argv) {
3335
}
3436
Tensor input = CreateTensor(ctx, Shape{N}, kf32, inputArr.data());
3537
Tensor output = CreateTensor(ctx, Shape{N}, kf32);
36-
Kernel op = CreateKernel(ctx, CreateShader(kGelu, 256, kf32), input, output,
38+
std::promise<void> promise;
39+
std::future<void> future = promise.get_future();
40+
Kernel op = CreateKernel(ctx, CreateShader(kGelu, 256, kf32), TensorList{input, output},
3741
/* nthreads */ {N, 1, 1});
38-
DispatchKernel(ctx, op);
39-
Wait(ctx, op.future);
42+
DispatchKernel(ctx, op, promise);
43+
Wait(ctx, future);
4044
ToCPU(ctx, output, outputArr.data(), sizeof(outputArr));
4145
for (int i = 0; i < 32; ++i) {
4246
printf("out[%d] : gelu(%.2f) = %.2f\n", i, inputArr[i], outputArr[i]);

examples/physics/run.cpp

+6-7
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
#include <array>
33
#include <chrono>
44
#include <cstdio>
5+
#include <future>
56

67
using namespace gpu; // CreateContext, CreateTensor, CreateKernel,
78
// CreateShader, DispatchKernel, Wait, ToCPU
@@ -43,21 +44,19 @@ int main() {
4344
Tensor length = CreateTensor(ctx, Shape{N}, kf32, lengthArr.data());
4445
Tensor mass = CreateTensor(ctx, Shape{N}, kf32, massArr.data());
4546

46-
// TODO: no need to have output
47-
Tensor output = CreateTensor(ctx, Shape{N}, kf32);
48-
4947
Shape nThreads{N, 1, 1};
5048
Kernel update = CreateKernel(
5149
ctx, CreateShader(kShaderSimulation, 256, kf32),
5250
TensorList{pos1, vel1, pos2, vel2,
53-
length, mass}, output,
51+
length, mass},
5452
nThreads);
5553
while (true) {
5654
auto start = std::chrono::high_resolution_clock::now();
5755
ResetCommandBuffer(ctx.device, nThreads, update);
58-
59-
DispatchKernel(ctx, update);
60-
Wait(ctx, update.future);
56+
std::promise<void> promise;
57+
std::future<void> future = promise.get_future();
58+
DispatchKernel(ctx, update, promise);
59+
Wait(ctx, future);
6160
auto end = std::chrono::high_resolution_clock::now();
6261
std::chrono::duration<double> elapsed = end - start;
6362
std::this_thread::sleep_for(std::chrono::milliseconds(16) - elapsed);

examples/render/run.cpp

+10-9
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
#include <array>
22
#include <chrono>
33
#include <cstdio>
4+
#include <future>
45

56
#include "gpu.h"
67
#include "utils/array_utils.h"
@@ -119,16 +120,18 @@ int main(int argc, char **argv) {
119120

120121
ShaderCode shader = CreateShader(kSDF, Shape{16, 16, 1});
121122
Kernel renderKernel =
122-
CreateKernel(ctx, shader, {}, 0, devScreen, {NCOLS, NROWS, 1}, params);
123+
CreateKernel(ctx, shader, TensorList{devScreen}, {NCOLS, NROWS, 1}, params);
123124
while (true) {
124-
DispatchKernel(ctx, renderKernel);
125-
Wait(ctx, renderKernel.future);
125+
std::promise<void> promise;
126+
std::future<void> future = promise.get_future();
127+
DispatchKernel(ctx, renderKernel, promise);
128+
Wait(ctx, future);
126129
ToCPU(ctx, devScreen, screen.data(), sizeof(screen));
127-
// Update the time field, write pparams to GPU, and create a new command
128-
// buffer
129130
params.time = getCurrentTimeInMilliseconds() - zeroTime;
131+
132+
// write params to the last buffer
130133
wgpuQueueWriteBuffer(ctx.queue,
131-
renderKernel.buffers[renderKernel.numBuffers - 1], 0,
134+
renderKernel.buffers[renderKernel.numBindings - 1], 0,
132135
static_cast<void *>(&params), sizeof(params));
133136
ResetCommandBuffer(ctx.device, /*nthreads*/ {NCOLS, NROWS, 1},
134137
renderKernel);
@@ -137,15 +140,14 @@ int main(int argc, char **argv) {
137140
"\\|()1{}[]?-_+~<>i!lI;:,\"^`'. ";
138141
// static const char intensity[] = "@%#8$X71x*+=-:^~'.` ";
139142

140-
// normalize values
143+
// Intensity = depth map, focus on depth of the objects
141144
float min = 0.0;
142145
float max = params.sphereRadius * 3;
143146

144147
for (size_t i = 0; i < screen.size(); ++i) {
145148
screen[i] = (screen[i] - min) / (max - min);
146149
}
147150

148-
// index into intensity array
149151
std::array<char, screen.size()> raster;
150152
for (size_t i = 0; i < screen.size(); ++i) {
151153
size_t index =
@@ -155,7 +157,6 @@ int main(int argc, char **argv) {
155157
raster[i] = intensity[index];
156158
}
157159

158-
// Draw the raster
159160
char buffer[(NROWS + 2) * (NCOLS + 2)];
160161
char *offset = buffer;
161162
sprintf(offset, "+");

0 commit comments

Comments
 (0)