move tutorial content from an app to a markdown file

austinvhuang · austinvhuang · commit f22c664d593f · 2024-06-19T15:34:00.000-04:00
diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -68,19 +68,6 @@ else()
   exit()
 endif()
 
-# Hello world demo
-
-set(SRC_DEMO run.cpp gpu.h nn/shaders.h utils/array_utils.h utils/logging.h)
-add_executable(run_demo ${SRC_DEMO})
-target_link_libraries(run_demo PRIVATE ${LIBDL} ${CMAKE_DL_LIBS} webgpu)
-
-# Test of basic kernels
-
-set(SRC_TESTS utils/test_kernels.cpp gpu.h nn/shaders.h utils/array_utils.h utils/logging.h)
-add_executable(run_tests ${SRC_TESTS})
-target_link_libraries(run_tests PRIVATE ${LIBDL} ${CMAKE_DL_LIBS} webgpu)
-target_include_directories(run_tests PRIVATE ${CMAKE_CURRENT_SOURCE_DIR})
-
 
 # Build the library target (libgpu)
 
@@ -90,3 +77,10 @@ add_library(gpu SHARED ${SRC_LIB})
 set_target_properties(gpu PROPERTIES LINKER_LANGUAGE CXX)
 
 # For additional targets see directories under `examples/`, which have their own CMakeLists.txt
+
+# Test of basic kernels
+
+set(SRC_TESTS utils/test_kernels.cpp gpu.h nn/shaders.h utils/array_utils.h utils/logging.h)
+add_executable(run_tests ${SRC_TESTS})
+target_link_libraries(run_tests PRIVATE ${LIBDL} ${CMAKE_DL_LIBS} webgpu)
+target_include_directories(run_tests PRIVATE ${CMAKE_CURRENT_SOURCE_DIR})
diff --git a/Makefile b/Makefile
@@ -1,12 +1,11 @@
 NUM_JOBS=$(shell nproc)
 CXX=clang++
-TARGET_DEMO=run_demo
 TARGET_TESTS=run_tests
 TARGET_LIB=gpu
 TARGET_ALL=$(TARGET_DEMO) $(TARGET_TESTS) $(TARGET_LIB)
 USE_LOCAL=-DUSE_LOCAL_LIBS=ON
 
-.PHONY: demo tests libgpu debug build check-entr watch-demo watch-tests clean
+.PHONY: tests libgpu debug build check-entr watch-tests clean
 
 # Add --trace to see the cmake commands
 FLAGS = -DCMAKE_VERBOSE_MAKEFILE:BOOL=ON -DCMAKE_CXX_COMPILER=$(CXX) -DABSL_INTERNAL_AT_LEAST_CXX20=OFF
@@ -17,9 +16,6 @@ RELEASE_FLAGS = $(FLAGS) -DFASTBUILD:BOOL=OFF
 LOCAL_FLAGS = -DUSE_LOCAL_LIBS=ON 
 CMAKE_CMD = mkdir -p build && cd build && cmake ..
 
-demo: check-dependencies
-	$(CMAKE_CMD) $(FASTBUILD_FLAGS) && make -j$(NUM_JOBS) $(TARGET_DEMO) && ./$(TARGET_DEMO)
-
 tests: check-dependencies
 	$(CMAKE_CMD) $(FASTBUILD_FLAGS) && make -j$(NUM_JOBS) $(TARGET_TESTS) && ./$(TARGET_TESTS)
 
diff --git a/examples/physics/run.cpp b/examples/physics/run.cpp
@@ -11,31 +11,46 @@ using namespace gpu; // CreateContext, CreateTensor, CreateKernel,
 const char *kShaderSimulation = R"(
 const G: f32 = 9.81;
 const dt: f32 = 0.01;
+
+// size = 2 * # of pendulums
 @group(0) @binding(0) var<storage, read_write> pos1: array<{{precision}}>;
 @group(0) @binding(1) var<storage, read_write> vel1: array<{{precision}}>;
 @group(0) @binding(2) var<storage, read_write> pos2: array<{{precision}}>;
 @group(0) @binding(3) var<storage, read_write> vel2: array<{{precision}}>;
+
+// size = # of pendulums
 @group(0) @binding(4) var<storage, read_write> length: array<{{precision}}>;
 @group(0) @binding(5) var<storage, read_write> mass: array<{{precision}}>;
-@group(0) @binding(6) var<storage, read_write> output: array<{{precision}}>;
 
 @compute @workgroup_size({{workgroupSize}})
 fn main(
     @builtin(global_invocation_id) GlobalInvocationID: vec3<u32>) {
+    let ic: u32 = GlobalInvocationID.x * 2; // x and y values are adjacent
     let i: u32 = GlobalInvocationID.x;
     if (i < arrayLength(&pos1)) {
-    // TODO
+    // Double pendulum x and y values are adjacent in the arrays
+    let x1: f32 = pos1[ic];
+    let y1: f32 = pos1[ic + 1];
+    let vx1: f32 = vel1[ic];
+    let vy1: f32 = vel1[ic + 1];
+    let x2: f32 = pos2[ic];
+    let y2: f32 = pos2[ic + 1];
+    let vx2: f32 = vel2[ic];
+    let vy2: f32 = vel2[ic + 1];
+    let l: f32 = length[i];
+    let m: f32 = mass[i];
+
 
     }
 }
 )";
 
 int main() {
-  printf("\nHello, gpu.cpp\n\n");
   Context ctx = CreateContext();
   static constexpr size_t N = 1000;
 
-  std::array<float, N> x1Arr, x2Arr, y1Arr, y2Arr, vx1Arr, vy1Arr, vx2Arr, vy2Arr, lengthArr, massArr;
+  std::array<float, N> x1Arr, x2Arr, y1Arr, y2Arr, vx1Arr, vy1Arr, vx2Arr,
+      vy2Arr, lengthArr, massArr;
 
   Tensor pos1 = CreateTensor(ctx, Shape{N}, kf32, x1Arr.data());
   Tensor pos2 = CreateTensor(ctx, Shape{N}, kf32, x2Arr.data());
@@ -45,11 +60,9 @@ int main() {
   Tensor mass = CreateTensor(ctx, Shape{N}, kf32, massArr.data());
 
   Shape nThreads{N, 1, 1};
-  Kernel update = CreateKernel(
-      ctx, CreateShader(kShaderSimulation, 256, kf32),
-      TensorList{pos1, vel1, pos2, vel2,
-       length, mass}, 
-      nThreads);
+  Kernel update =
+      CreateKernel(ctx, CreateShader(kShaderSimulation, 256, kf32),
+                   TensorList{pos1, vel1, pos2, vel2, length, mass}, nThreads);
   while (true) {
     auto start = std::chrono::high_resolution_clock::now();
     ResetCommandBuffer(ctx.device, nThreads, update);
@@ -61,5 +74,4 @@ int main() {
     std::chrono::duration<double> elapsed = end - start;
     std::this_thread::sleep_for(std::chrono::milliseconds(16) - elapsed);
   }
-
 }
diff --git a/tutorial.md b/tutorial.md