@@ -33,10 +33,10 @@ void TestResidual(GPUContext &ctx) {
3333 GPUTensor output = CreateTensor (ctx, {N}, kf32, outputArr.data ());
3434 ShaderCode shaderCode = ResidualShader (workgroupSize, kf32);
3535 log (kDefLog , kInfo , " Shader Code :\n %s" , shaderCode.code .c_str ());
36- Kernel op = PrepareKernel <NoParam, 2 >(
36+ Kernel op = CreateKernel <NoParam, 2 >(
3737 ctx, ResidualShader (workgroupSize, kf32),
3838 std::array<GPUTensor, 2 >{input1, input2}, output, {});
39- LaunchKernel (ctx, op);
39+ DispatchKernel (ctx, op);
4040 Wait (ctx, op.future );
4141 ToCPU (ctx, output, outputArr.data (), sizeof (outputArr));
4242 log (kDefLog , kInfo , " %s" , show<float , N, 1 >(outputArr, " Output" ).c_str ());
@@ -56,9 +56,9 @@ void TestHadamard(GPUContext &ctx) {
5656 GPUTensor output = CreateTensor (ctx, {N}, kf32, outputArr.data ());
5757 ShaderCode shaderCode = HadamardShader (workgroupSize, kf32);
5858 log (kDefLog , kInfo , " Shader Code :\n %s" , shaderCode.code .c_str ());
59- Kernel op = PrepareKernel (ctx, HadamardShader (workgroupSize, kf32),
59+ Kernel op = CreateKernel (ctx, HadamardShader (workgroupSize, kf32),
6060 std::array{input1, input2}, output, {});
61- LaunchKernel (ctx, op);
61+ DispatchKernel (ctx, op);
6262 Wait (ctx, op.future );
6363 log (kDefLog , kInfo , " %s" , show<float , N, 1 >(outputArr, " Output" ).c_str ());
6464}
@@ -77,9 +77,9 @@ void TestMatmul(GPUContext &ctx) {
7777 GPUTensor input2 = CreateTensor (ctx, {K, N}, kf32, input2Arr.data ());
7878 GPUTensor output = CreateTensor (ctx, {M, N}, kf32, outputArr.data ());
7979 Kernel op =
80- PrepareKernel (ctx, MatmulShader (256 , kShaderMatMul1 , kf32, M, K, N),
80+ CreateKernel (ctx, MatmulShader (256 , kShaderMatMul1 , kf32, M, K, N),
8181 std::array{input1, input2}, output);
82- LaunchKernel (ctx, op);
82+ DispatchKernel (ctx, op);
8383 Wait (ctx, op.future );
8484 ToCPU (ctx, output, outputArr.data (), sizeof (outputArr));
8585 log (kDefLog , kInfo , " %s" , show<float , M, K>(input1Arr, " A" ).c_str ());
@@ -132,9 +132,9 @@ void TestGelu(GPUContext &ctx) {
132132 GPUTensor geluOut = CreateTensor (ctx, {N}, kf32, outputArr.data ());
133133 log (kDefLog , kInfo , " Creating GELU Shader" );
134134 Kernel op =
135- PrepareKernel (ctx, GeluShader (256 , kf32), std::array{geluIn}, geluOut);
136- log (kDefLog , kInfo , " Launching GELU Shader" );
137- LaunchKernel (ctx, op);
135+ CreateKernel (ctx, GeluShader (256 , kf32), std::array{geluIn}, geluOut);
136+ log (kDefLog , kInfo , " Dispatching GELU Shader" );
137+ DispatchKernel (ctx, op);
138138 Wait (ctx, op.future );
139139 ToCPU (ctx, geluOut, outputArr.data (), sizeof (outputArr));
140140 log (kDefLog , kInfo , " %s" , show<float , N, 1 >(inputArr, " GELU Input" ).c_str ());
@@ -169,10 +169,10 @@ void TestLayerNorm(GPUContext &ctx) {
169169 GPUTensor weight = CreateTensor (ctx, {C}, kf32, weightArr.data ());
170170 GPUTensor bias = CreateTensor (ctx, {C}, kf32, biasArr.data ());
171171 GPUTensor output = CreateTensor (ctx, {N, C}, kf32, outputArr.data ());
172- Kernel op = PrepareKernel <LNParam, 3 >(ctx, LayerNormShader (256 , kf32),
172+ Kernel op = CreateKernel <LNParam, 3 >(ctx, LayerNormShader (256 , kf32),
173173 std::array{input, weight, bias}, output,
174174 params);
175- LaunchKernel (ctx, op);
175+ DispatchKernel (ctx, op);
176176 Wait (ctx, op.future );
177177 ToCPU (ctx, output, outputArr.data (), sizeof (outputArr));
178178 log (kDefLog , kInfo , " %s" ,
@@ -212,9 +212,9 @@ void TestSoftmax(GPUContext &ctx) {
212212 randint (inputArr, gen, 0 , 3 );
213213 GPUTensor input = CreateTensor (ctx, {B, T, C}, kf32, inputArr.data ());
214214 GPUTensor output = CreateTensor (ctx, {B, T, C}, kf32, outputArr.data ());
215- Kernel op = PrepareKernel <SoftmaxParam, 1 >(ctx, SoftmaxShader (256 , kf32),
215+ Kernel op = CreateKernel <SoftmaxParam, 1 >(ctx, SoftmaxShader (256 , kf32),
216216 {input}, output, {B * T, C});
217- LaunchKernel (ctx, op);
217+ DispatchKernel (ctx, op);
218218 Wait (ctx, op.future );
219219 ToCPU (ctx, output, outputArr.data (), sizeof (outputArr));
220220 log (kDefLog , kInfo , " %s" ,
@@ -274,8 +274,8 @@ void TestMultiKernel1(GPUContext &ctx) {
274274 .params = ¶m,
275275 .paramSizes = &size,
276276 };
277- MultiKernel pipeline = PrepareMultiKernel (ctx, desc);
278- LaunchMultiKernel (ctx, pipeline);
277+ MultiKernel pipeline = CreateMultiKernel (ctx, desc);
278+ DispatchMultiKernel (ctx, pipeline);
279279 Wait (ctx, pipeline.future );
280280 ToCPU (ctx, output, outputArr.data (), sizeof (outputArr));
281281 log (kDefLog , kInfo , " %s" ,
@@ -328,8 +328,8 @@ void TestMultiKernel2(GPUContext &ctx) {
328328 .params = params.data (),
329329 .paramSizes = paramSizes.data (),
330330 };
331- MultiKernel pipeline = PrepareMultiKernel (ctx, desc);
332- LaunchMultiKernel (ctx, pipeline);
331+ MultiKernel pipeline = CreateMultiKernel (ctx, desc);
332+ DispatchMultiKernel (ctx, pipeline);
333333 Wait (ctx, pipeline.future );
334334
335335 log (kDefLog , kInfo , " %s" ,
0 commit comments