CreateCommandBuffer -> ResetCommandBuffer, test use in render example

austinvhuang · austinvhuang · commit acd07688ea2c · 2024-06-17T17:45:50.000-04:00
diff --git a/examples/render/run.cpp b/examples/render/run.cpp
@@ -117,29 +117,25 @@ int main(int argc, char **argv) {
   GPUTensor devScreen = CreateTensor(ctx, {NROWS, NCOLS}, kf32, screen.data());
   uint32_t zeroTime = getCurrentTimeInMilliseconds();
 
+  ShaderCode shader = CreateShader(kSDF, Shape{16, 16, 1});
+  Kernel renderKernel =
+      CreateKernel(ctx, shader, {}, 0, devScreen, {NCOLS, NROWS, 1}, params);
   while (true) {
-    params.time = getCurrentTimeInMilliseconds() - zeroTime;
-    ShaderCode shader = CreateShader(kSDF, Shape{16, 16, 1});
-
-    // TODO(avh): Clean this up - too easy to miscalculate # of workgroups in x
-    // and y directions since tensor dimensions (rows, cols) are reversed from
-    // screen dimensions (cols, rows)
-    Kernel render = CreateKernel(ctx, shader, {}, 0, devScreen,
-                                 static_cast<void *>(&params), sizeof(params),
-                                 {NCOLS, NROWS, 1});
-    DispatchKernel(ctx, render);
-    Wait(ctx, render.future);
+    DispatchKernel(ctx, renderKernel);
+    Wait(ctx, renderKernel.future);
     ToCPU(ctx, devScreen, screen.data(), sizeof(screen));
-
-    static const char intensity[] = "$@B%8&WM#*oahkbdpqwmZO0QLCJUYXzcvunxrjft/\\|()1{}[]?-_+~<>i!lI;:,\"^`'. ";
+    // Update the time field, write pparams to GPU, and create a new command buffer
+    params.time = getCurrentTimeInMilliseconds() - zeroTime;
+    wgpuQueueWriteBuffer(ctx.queue,
+                         renderKernel.buffers[renderKernel.numBuffers - 1], 0,
+                         static_cast<void *>(&params), sizeof(params));
+    ResetCommandBuffer(ctx.device, shader.workgroupSize, {NCOLS, NROWS, 1},
+                       renderKernel);
+
+    static const char intensity[] = "$@B%8&WM#*oahkbdpqwmZO0QLCJUYXzcvunxrjft/"
+                                    "\\|()1{}[]?-_+~<>i!lI;:,\"^`'. ";
     // static const char intensity[] = "@%#8$X71x*+=-:^~'.` ";
 
-    // clear the screen, move cursor to the top
-    printf("\033[2J\033[H");
-
-    // fprintf(stdout, "%s",
-    //        show<float, NROWS, NCOLS>(screen, "Raw values").c_str());
-
     // normalize values
     float min = 0.0;
     float max = params.sphereRadius * 3;
@@ -148,14 +144,6 @@ int main(int argc, char **argv) {
       screen[i] = (screen[i] - min) / (max - min);
     }
 
-    fprintf(stdout, "Workgroup size: %zu %zu %zu \n", shader.workgroupSize[0],
-            shader.workgroupSize[1], shader.workgroupSize[2]);
-    fprintf(stdout, "Number of Threads: %zu %zu %d \n", devScreen.shape[1],
-            devScreen.shape[0], 1);
-
-    // fprintf(stdout, "%s", show<float, NROWS, NCOLS>(screen,
-    // "Scaled").c_str());
-
     // index into intensity array
     std::array<char, screen.size()> raster;
     for (size_t i = 0; i < screen.size(); ++i) {
@@ -166,24 +154,33 @@ int main(int argc, char **argv) {
       raster[i] = intensity[index];
     }
 
-    // draw the tui screen
-    printf("+");
+    // Draw the raster
+    char buffer[(NROWS + 2) * (NCOLS + 2)];
+    char *offset = buffer;
+    sprintf(offset, "+");
     for (size_t col = 0; col < NCOLS; ++col) {
-      printf("-");
+      sprintf(offset + col + 1, "-");
     }
-    printf("+\n");
+    sprintf(buffer + NCOLS + 1, "+\n");
+    offset += NCOLS + 3;
     for (size_t row = 0; row < NROWS; ++row) {
-      printf("|");
+      sprintf(offset, "|");
       for (size_t col = 0; col < NCOLS; ++col) {
-        printf("%c", raster[row * NCOLS + col]);
+        sprintf(offset + col + 1, "%c", raster[row * NCOLS + col]);
       }
-      printf("|\n");
+      sprintf(offset + NCOLS + 1, "|\n");
+      offset += NCOLS + 3;
     }
-    printf("+");
+    sprintf(offset, "+");
     for (size_t col = 0; col < NCOLS; ++col) {
-      printf("-");
+      sprintf(offset + col + 1, "-");
     }
-    printf("+\n");
+    sprintf(offset + NCOLS + 1, "+\n");
+    printf("\033[2J\033[H");
+    printf("Workgroup size: %zu %zu %zu \n", shader.workgroupSize[0],
+           shader.workgroupSize[1], shader.workgroupSize[2]);
+    printf("Number of Threads: %zu %zu %d \n", devScreen.shape[1],
+           devScreen.shape[0], 1);
+    printf("%s", buffer);
   }
-
 }
diff --git a/gpu.h b/gpu.h
@@ -153,8 +153,9 @@ struct Kernel {
   size_t outputSize;
   size_t numBuffers;
   size_t numInputs;
-  WGPUCommandBuffer commandBuffer;     // destroyed upon submission
+  WGPUBindGroup bindGroup;             // persists between submission
   WGPUComputePipeline computePipeline; // persists between submission
+  WGPUCommandBuffer commandBuffer;     // destroyed upon submission
   WGPUBuffer readbackBuffer;
   CallbackDataDyn callbackData;
   std::promise<void> promise;
@@ -532,36 +533,34 @@ void ToGPU(GPUContext &ctx, const float *data, GPUTensor &tensor) {
 }
 
 // Separate this out since WGPUCommandBuffer is destroyed upon submission
-WGPUCommandBuffer
-CreateCommandBuffer(WGPUDevice &device,
-                    const WGPUComputePipeline &computePipeline,
-                    const WGPUBindGroup &bindGroup, const ShaderCode &shader,
-                    const Shape &nThreads) {
-  WGPUCommandBuffer commandBuffer;
-  log(kDefLog, kInfo, "Create command buffer 0x%x", commandBuffer);
+void ResetCommandBuffer(WGPUDevice &device,
+                    const Shape &workgroupSize,
+                    const Shape &nThreads, Kernel &op) {
+  log(kDefLog, kInfo, "Create command buffer 0x%x", op.commandBuffer);
   {
     WGPUCommandEncoder commandEncoder =
         wgpuDeviceCreateCommandEncoder(device, nullptr);
     WGPUComputePassEncoder computePassEncoder =
         wgpuCommandEncoderBeginComputePass(commandEncoder, nullptr);
-    wgpuComputePassEncoderSetPipeline(computePassEncoder, computePipeline);
-    wgpuComputePassEncoderSetBindGroup(computePassEncoder, 0, bindGroup, 0,
+    wgpuComputePassEncoderSetPipeline(computePassEncoder, op.computePipeline);
+    wgpuComputePassEncoderSetBindGroup(computePassEncoder, 0, op.bindGroup, 0,
                                        nullptr);
     log(kDefLog, kInfo, "Dispatching workgroups for number of threads = %s",
         ToString(nThreads).c_str());
     wgpuComputePassEncoderDispatchWorkgroups(
         computePassEncoder,
-        /* # X workgroups */ (nThreads[0] + (shader.workgroupSize[0] - 1)) /
-            shader.workgroupSize[0],
-        /* # Y workgroups */ (nThreads[1] + (shader.workgroupSize[1] - 1)) /
-            shader.workgroupSize[1],
-        /* # Z workgroups */ (nThreads[2] + (shader.workgroupSize[2] - 1)) /
-            shader.workgroupSize[2]);
+        /* # X workgroups */ (nThreads[0] + (workgroupSize[0] - 1)) /
+            workgroupSize[0],
+        /* # Y workgroups */ (nThreads[1] + (workgroupSize[1] - 1)) /
+            workgroupSize[1],
+        /* # Z workgroups */ (nThreads[2] + (workgroupSize[2] - 1)) /
+            workgroupSize[2]);
     wgpuComputePassEncoderEnd(computePassEncoder);
-    commandBuffer = wgpuCommandEncoderFinish(commandEncoder, nullptr);
-    check(commandBuffer, "Create command buffer", __FILE__, __LINE__);
+    op.commandBuffer = wgpuCommandEncoderFinish(commandEncoder, nullptr);
+    check(op.commandBuffer, "Create command buffer", __FILE__, __LINE__);
   }
-  return commandBuffer;
+  op.promise = std::promise<void>();
+  op.future = op.promise.get_future();
 }
 
 Kernel CreateKernel(GPUContext &ctx, const ShaderCode &shader,
@@ -686,11 +685,7 @@ Kernel CreateKernel(GPUContext &ctx, const ShaderCode &shader,
       .entryCount = static_cast<uint32_t>(bindGroupEntries.size()),
       .entries = bindGroupEntries.data(),
   };
-  WGPUBindGroup bindGroup = wgpuDeviceCreateBindGroup(device, &bindGroupDesc);
-
-  log(kDefLog, kInfo, "Initializing promise and future");
-  op.promise = std::promise<void>();
-  op.future = op.promise.get_future();
+  op.bindGroup = wgpuDeviceCreateBindGroup(device, &bindGroupDesc);
 
   log(kDefLog, kInfo, "Create the readback buffer");
   {
@@ -727,8 +722,8 @@ Kernel CreateKernel(GPUContext &ctx, const ShaderCode &shader,
         wgpuDeviceCreateComputePipeline(device, &computePipelineDesc);
     check(op.computePipeline, "Create compute pipeline", __FILE__, __LINE__);
   }
-  op.commandBuffer = CreateCommandBuffer(device, op.computePipeline, bindGroup,
-                                         shader, nThreads);
+  ResetCommandBuffer(device, shader.workgroupSize, nThreads, op);
+                                
 
   log(kDefLog, kInfo, "Initializing callbackData");
   op.callbackData = {op.readbackBuffer, op.outputSize, nullptr, &op.promise};