Skip to content

Commit 2223ea4

Browse files
committed
clean up API naming convention consistency - Prepare -> Create, Launch -> Dispatch
1 parent f0257ff commit 2223ea4

File tree

6 files changed

+39
-39
lines changed

6 files changed

+39
-39
lines changed

README.md

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -64,8 +64,8 @@ int main(int argc, char **argv) {
6464
GPUTensor input = CreateTensor(ctx, {N}, kf32, inputArr.data());
6565
GPUTensor output = CreateTensor(ctx, {N}, kf32, outputArr.data());
6666
Kernel op =
67-
PrepareKernel(ctx, kGELU, std::array{input}, output);
68-
LaunchKernel(ctx, op);
67+
CreateKernel(ctx, kGELU, std::array{input}, output);
68+
DispatchKernel(ctx, op);
6969
Wait(ctx, op.future);
7070
ToCPU(ctx, output, outputArr.data(), sizeof(outputArr));
7171
for (int i = 0; i < 10; ++i) {
@@ -77,7 +77,7 @@ int main(int argc, char **argv) {
7777
```
7878

7979
For those curious about what happens under the hood with the raw WebGPU API,
80-
the equivalent functionality is implemented using the raw WebGPU C API in
80+
the equivalent functionality is implemented using the WebGPU C API in
8181
`examples/webgpu_intro/run.cpp`.
8282

8383
## Quick Start

examples/hello_world/run.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -18,8 +18,8 @@ int main(int argc, char **argv) {
1818
GPUTensor output = CreateTensor(ctx, {N}, kf32, outputArr.data());
1919

2020
Kernel op =
21-
PrepareKernel(ctx, GeluShader(256, kf32), std::array{input}, output);
22-
LaunchKernel(ctx, op);
21+
CreateKernel(ctx, GeluShader(256, kf32), std::array{input}, output);
22+
DispatchKernel(ctx, op);
2323
Wait(ctx, op.future);
2424
ToCPU(ctx, output, outputArr.data(), sizeof(outputArr));
2525
for (int i = 0; i < 10; ++i) {

examples/raymarch/run.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -78,8 +78,8 @@ int main(int argc, char **argv) {
7878

7979
GPUContext ctx = CreateGPUContext();
8080
GPUTensor devScreen = CreateTensor(ctx, {NROWS, NCOLS}, kf32, screen.data());
81-
Kernel render = PrepareKernel(ctx, ShaderCode{kSDF, 64}, {}, 0, devScreen, params);
82-
LaunchKernel(ctx, render);
81+
Kernel render = CreateKernel(ctx, ShaderCode{kSDF, 64}, {}, 0, devScreen, params);
82+
DispatchKernel(ctx, render);
8383
Wait(ctx, render.future);
8484
ToCPU(ctx, devScreen, screen.data(), sizeof(screen));
8585

gpu.h

Lines changed: 11 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -440,7 +440,7 @@ void ToGPU(GPUContext &ctx, const float *data, GPUTensor &tensor) {
440440
tensor.data.size);
441441
}
442442

443-
Kernel PrepareKernel(GPUContext &ctx, const ShaderCode &shader,
443+
Kernel CreateKernel(GPUContext &ctx, const ShaderCode &shader,
444444
const GPUTensor *inputs, size_t numInputs,
445445
const GPUTensor &output, const void *params = nullptr,
446446
size_t paramsSize = 0) {
@@ -628,39 +628,39 @@ Kernel PrepareKernel(GPUContext &ctx, const ShaderCode &shader,
628628
check(op.commandBuffer, "Create command buffer", __FILE__, __LINE__);
629629
}
630630

631-
log(kDefLog, kInfo, "Exiting PrepareKernel");
631+
log(kDefLog, kInfo, "Exiting CreateKernel");
632632
return op;
633633
}
634634

635635
template <typename ParamsType = NoParam>
636-
Kernel PrepareKernel(GPUContext &ctx, const ShaderCode &shader,
636+
Kernel CreateKernel(GPUContext &ctx, const ShaderCode &shader,
637637
const GPUTensor *inputs, size_t numInputs,
638638
const GPUTensor &output,
639639
const ParamsType &params = ParamsType{}) {
640640
if constexpr (!IsNoParam<ParamsType>) {
641641
log(kDefLog, kInfo, "Using params of size %d bytes", sizeof(ParamsType));
642-
return PrepareKernel(ctx, shader, inputs, numInputs, output,
642+
return CreateKernel(ctx, shader, inputs, numInputs, output,
643643
reinterpret_cast<const void *>(&params),
644644
sizeof(ParamsType));
645645
} else {
646646
log(kDefLog, kInfo, "No params");
647-
return PrepareKernel(ctx, shader, inputs, numInputs, output, nullptr, 0);
647+
return CreateKernel(ctx, shader, inputs, numInputs, output, nullptr, 0);
648648
}
649649
}
650650

651651
/*
652-
* PrepareKernel with array of inputs (convienence function)
652+
* CreateKernel with array of inputs (convienence function)
653653
*/
654654
template <typename ParamsType = NoParam, size_t numInputs>
655-
Kernel PrepareKernel(GPUContext &ctx, const ShaderCode &shader,
655+
Kernel CreateKernel(GPUContext &ctx, const ShaderCode &shader,
656656
const std::array<GPUTensor, numInputs> &inputs,
657657
const GPUTensor &output,
658658
const ParamsType &params = ParamsType{}) {
659-
return PrepareKernel<ParamsType>(ctx, shader, inputs.data(), numInputs,
659+
return CreateKernel<ParamsType>(ctx, shader, inputs.data(), numInputs,
660660
output, params);
661661
}
662662

663-
MultiKernel PrepareMultiKernel(GPUContext &ctx, const MultiKernelDesc &desc) {
663+
MultiKernel CreateMultiKernel(GPUContext &ctx, const MultiKernelDesc &desc) {
664664
WGPUDevice device = ctx.device;
665665
WGPUQueue queue = ctx.queue;
666666
MultiKernel pipeline;
@@ -849,7 +849,7 @@ MultiKernel PrepareMultiKernel(GPUContext &ctx, const MultiKernelDesc &desc) {
849849
return pipeline;
850850
}
851851

852-
void LaunchKernel(GPUContext &ctx, Kernel &op) {
852+
void DispatchKernel(GPUContext &ctx, Kernel &op) {
853853
// Submit the command buffer
854854
wgpuQueueSubmit(ctx.queue, 1, &op.commandBuffer);
855855

@@ -870,7 +870,7 @@ void LaunchKernel(GPUContext &ctx, Kernel &op) {
870870
&op.callbackData);
871871
}
872872

873-
void LaunchMultiKernel(GPUContext &ctx, MultiKernel &pipeline) {
873+
void DispatchMultiKernel(GPUContext &ctx, MultiKernel &pipeline) {
874874
wgpuQueueSubmit(ctx.queue, 1, &pipeline.commandBuffer);
875875

876876
pipeline.callbackData = CallbackDataDyn{

run.cpp

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -76,9 +76,9 @@ The core verbs (functions) of interest are:
7676
7777
- *Requesting GPU Resources* - CreateGPUContext(), CreateArray() and
7878
CreateTensor()
79-
- *Ahead-of-Time Preparation of a Computation* - PrepareKernel() which both binds
79+
- *Ahead-of-Time Preparation of a Computation* - CreateKernel() which both binds
8080
resources and compiles the kernel
81-
- *Asynchronous Execution of Computation* - LaunchKernel(), Wait()
81+
- *Asynchronous Execution of Computation* - DispatchKernel(), Wait()
8282
- *Data Movement* - ToCPU(), ToGPU(), also CreateArray and CreateTensor have
8383
convenience overloads that take CPU data directly as part of instantiation.
8484
@@ -186,15 +186,15 @@ workgroup size is the number of threads in a workgroup.
186186
)");
187187

188188
section(R"(
189-
Preparing a kernel
189+
Creating a kernel
190190
------------------
191191
192192
TODO(avh)
193193
)");
194194

195195

196196
section(R"(
197-
Launching a kernel
197+
Dispatching a kernel
198198
------------------
199199
200200
TODO(avh)

utils/test_kernels.cpp

Lines changed: 17 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -33,10 +33,10 @@ void TestResidual(GPUContext &ctx) {
3333
GPUTensor output = CreateTensor(ctx, {N}, kf32, outputArr.data());
3434
ShaderCode shaderCode = ResidualShader(workgroupSize, kf32);
3535
log(kDefLog, kInfo, "Shader Code :\n%s", shaderCode.code.c_str());
36-
Kernel op = PrepareKernel<NoParam, 2>(
36+
Kernel op = CreateKernel<NoParam, 2>(
3737
ctx, ResidualShader(workgroupSize, kf32),
3838
std::array<GPUTensor, 2>{input1, input2}, output, {});
39-
LaunchKernel(ctx, op);
39+
DispatchKernel(ctx, op);
4040
Wait(ctx, op.future);
4141
ToCPU(ctx, output, outputArr.data(), sizeof(outputArr));
4242
log(kDefLog, kInfo, "%s", show<float, N, 1>(outputArr, "Output").c_str());
@@ -56,9 +56,9 @@ void TestHadamard(GPUContext &ctx) {
5656
GPUTensor output = CreateTensor(ctx, {N}, kf32, outputArr.data());
5757
ShaderCode shaderCode = HadamardShader(workgroupSize, kf32);
5858
log(kDefLog, kInfo, "Shader Code :\n%s", shaderCode.code.c_str());
59-
Kernel op = PrepareKernel(ctx, HadamardShader(workgroupSize, kf32),
59+
Kernel op = CreateKernel(ctx, HadamardShader(workgroupSize, kf32),
6060
std::array{input1, input2}, output, {});
61-
LaunchKernel(ctx, op);
61+
DispatchKernel(ctx, op);
6262
Wait(ctx, op.future);
6363
log(kDefLog, kInfo, "%s", show<float, N, 1>(outputArr, "Output").c_str());
6464
}
@@ -77,9 +77,9 @@ void TestMatmul(GPUContext &ctx) {
7777
GPUTensor input2 = CreateTensor(ctx, {K, N}, kf32, input2Arr.data());
7878
GPUTensor output = CreateTensor(ctx, {M, N}, kf32, outputArr.data());
7979
Kernel op =
80-
PrepareKernel(ctx, MatmulShader(256, kShaderMatMul1, kf32, M, K, N),
80+
CreateKernel(ctx, MatmulShader(256, kShaderMatMul1, kf32, M, K, N),
8181
std::array{input1, input2}, output);
82-
LaunchKernel(ctx, op);
82+
DispatchKernel(ctx, op);
8383
Wait(ctx, op.future);
8484
ToCPU(ctx, output, outputArr.data(), sizeof(outputArr));
8585
log(kDefLog, kInfo, "%s", show<float, M, K>(input1Arr, "A").c_str());
@@ -132,9 +132,9 @@ void TestGelu(GPUContext &ctx) {
132132
GPUTensor geluOut = CreateTensor(ctx, {N}, kf32, outputArr.data());
133133
log(kDefLog, kInfo, "Creating GELU Shader");
134134
Kernel op =
135-
PrepareKernel(ctx, GeluShader(256, kf32), std::array{geluIn}, geluOut);
136-
log(kDefLog, kInfo, "Launching GELU Shader");
137-
LaunchKernel(ctx, op);
135+
CreateKernel(ctx, GeluShader(256, kf32), std::array{geluIn}, geluOut);
136+
log(kDefLog, kInfo, "Dispatching GELU Shader");
137+
DispatchKernel(ctx, op);
138138
Wait(ctx, op.future);
139139
ToCPU(ctx, geluOut, outputArr.data(), sizeof(outputArr));
140140
log(kDefLog, kInfo, "%s", show<float, N, 1>(inputArr, "GELU Input").c_str());
@@ -169,10 +169,10 @@ void TestLayerNorm(GPUContext &ctx) {
169169
GPUTensor weight = CreateTensor(ctx, {C}, kf32, weightArr.data());
170170
GPUTensor bias = CreateTensor(ctx, {C}, kf32, biasArr.data());
171171
GPUTensor output = CreateTensor(ctx, {N, C}, kf32, outputArr.data());
172-
Kernel op = PrepareKernel<LNParam, 3>(ctx, LayerNormShader(256, kf32),
172+
Kernel op = CreateKernel<LNParam, 3>(ctx, LayerNormShader(256, kf32),
173173
std::array{input, weight, bias}, output,
174174
params);
175-
LaunchKernel(ctx, op);
175+
DispatchKernel(ctx, op);
176176
Wait(ctx, op.future);
177177
ToCPU(ctx, output, outputArr.data(), sizeof(outputArr));
178178
log(kDefLog, kInfo, "%s",
@@ -212,9 +212,9 @@ void TestSoftmax(GPUContext &ctx) {
212212
randint(inputArr, gen, 0, 3);
213213
GPUTensor input = CreateTensor(ctx, {B, T, C}, kf32, inputArr.data());
214214
GPUTensor output = CreateTensor(ctx, {B, T, C}, kf32, outputArr.data());
215-
Kernel op = PrepareKernel<SoftmaxParam, 1>(ctx, SoftmaxShader(256, kf32),
215+
Kernel op = CreateKernel<SoftmaxParam, 1>(ctx, SoftmaxShader(256, kf32),
216216
{input}, output, {B * T, C});
217-
LaunchKernel(ctx, op);
217+
DispatchKernel(ctx, op);
218218
Wait(ctx, op.future);
219219
ToCPU(ctx, output, outputArr.data(), sizeof(outputArr));
220220
log(kDefLog, kInfo, "%s",
@@ -274,8 +274,8 @@ void TestMultiKernel1(GPUContext &ctx) {
274274
.params = &param,
275275
.paramSizes = &size,
276276
};
277-
MultiKernel pipeline = PrepareMultiKernel(ctx, desc);
278-
LaunchMultiKernel(ctx, pipeline);
277+
MultiKernel pipeline = CreateMultiKernel(ctx, desc);
278+
DispatchMultiKernel(ctx, pipeline);
279279
Wait(ctx, pipeline.future);
280280
ToCPU(ctx, output, outputArr.data(), sizeof(outputArr));
281281
log(kDefLog, kInfo, "%s",
@@ -328,8 +328,8 @@ void TestMultiKernel2(GPUContext &ctx) {
328328
.params = params.data(),
329329
.paramSizes = paramSizes.data(),
330330
};
331-
MultiKernel pipeline = PrepareMultiKernel(ctx, desc);
332-
LaunchMultiKernel(ctx, pipeline);
331+
MultiKernel pipeline = CreateMultiKernel(ctx, desc);
332+
DispatchMultiKernel(ctx, pipeline);
333333
Wait(ctx, pipeline.future);
334334

335335
log(kDefLog, kInfo, "%s",

0 commit comments

Comments
 (0)