Skip to content

Commit f22c664

Browse files
committed
move tutorial content from an app to a markdown file
1 parent cb77adc commit f22c664

File tree

4 files changed

+62
-201
lines changed

4 files changed

+62
-201
lines changed

CMakeLists.txt

+7-13
Original file line numberDiff line numberDiff line change
@@ -68,19 +68,6 @@ else()
6868
exit()
6969
endif()
7070

71-
# Hello world demo
72-
73-
set(SRC_DEMO run.cpp gpu.h nn/shaders.h utils/array_utils.h utils/logging.h)
74-
add_executable(run_demo ${SRC_DEMO})
75-
target_link_libraries(run_demo PRIVATE ${LIBDL} ${CMAKE_DL_LIBS} webgpu)
76-
77-
# Test of basic kernels
78-
79-
set(SRC_TESTS utils/test_kernels.cpp gpu.h nn/shaders.h utils/array_utils.h utils/logging.h)
80-
add_executable(run_tests ${SRC_TESTS})
81-
target_link_libraries(run_tests PRIVATE ${LIBDL} ${CMAKE_DL_LIBS} webgpu)
82-
target_include_directories(run_tests PRIVATE ${CMAKE_CURRENT_SOURCE_DIR})
83-
8471

8572
# Build the library target (libgpu)
8673

@@ -90,3 +77,10 @@ add_library(gpu SHARED ${SRC_LIB})
9077
set_target_properties(gpu PROPERTIES LINKER_LANGUAGE CXX)
9178

9279
# For additional targets see directories under `examples/`, which have their own CMakeLists.txt
80+
81+
# Test of basic kernels
82+
83+
set(SRC_TESTS utils/test_kernels.cpp gpu.h nn/shaders.h utils/array_utils.h utils/logging.h)
84+
add_executable(run_tests ${SRC_TESTS})
85+
target_link_libraries(run_tests PRIVATE ${LIBDL} ${CMAKE_DL_LIBS} webgpu)
86+
target_include_directories(run_tests PRIVATE ${CMAKE_CURRENT_SOURCE_DIR})

Makefile

+1-5
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,11 @@
11
NUM_JOBS=$(shell nproc)
22
CXX=clang++
3-
TARGET_DEMO=run_demo
43
TARGET_TESTS=run_tests
54
TARGET_LIB=gpu
65
TARGET_ALL=$(TARGET_DEMO) $(TARGET_TESTS) $(TARGET_LIB)
76
USE_LOCAL=-DUSE_LOCAL_LIBS=ON
87

9-
.PHONY: demo tests libgpu debug build check-entr watch-demo watch-tests clean
8+
.PHONY: tests libgpu debug build check-entr watch-tests clean
109

1110
# Add --trace to see the cmake commands
1211
FLAGS = -DCMAKE_VERBOSE_MAKEFILE:BOOL=ON -DCMAKE_CXX_COMPILER=$(CXX) -DABSL_INTERNAL_AT_LEAST_CXX20=OFF
@@ -17,9 +16,6 @@ RELEASE_FLAGS = $(FLAGS) -DFASTBUILD:BOOL=OFF
1716
LOCAL_FLAGS = -DUSE_LOCAL_LIBS=ON
1817
CMAKE_CMD = mkdir -p build && cd build && cmake ..
1918

20-
demo: check-dependencies
21-
$(CMAKE_CMD) $(FASTBUILD_FLAGS) && make -j$(NUM_JOBS) $(TARGET_DEMO) && ./$(TARGET_DEMO)
22-
2319
tests: check-dependencies
2420
$(CMAKE_CMD) $(FASTBUILD_FLAGS) && make -j$(NUM_JOBS) $(TARGET_TESTS) && ./$(TARGET_TESTS)
2521

examples/physics/run.cpp

+22-10
Original file line numberDiff line numberDiff line change
@@ -11,31 +11,46 @@ using namespace gpu; // CreateContext, CreateTensor, CreateKernel,
1111
const char *kShaderSimulation = R"(
1212
const G: f32 = 9.81;
1313
const dt: f32 = 0.01;
14+
15+
// size = 2 * # of pendulums
1416
@group(0) @binding(0) var<storage, read_write> pos1: array<{{precision}}>;
1517
@group(0) @binding(1) var<storage, read_write> vel1: array<{{precision}}>;
1618
@group(0) @binding(2) var<storage, read_write> pos2: array<{{precision}}>;
1719
@group(0) @binding(3) var<storage, read_write> vel2: array<{{precision}}>;
20+
21+
// size = # of pendulums
1822
@group(0) @binding(4) var<storage, read_write> length: array<{{precision}}>;
1923
@group(0) @binding(5) var<storage, read_write> mass: array<{{precision}}>;
20-
@group(0) @binding(6) var<storage, read_write> output: array<{{precision}}>;
2124
2225
@compute @workgroup_size({{workgroupSize}})
2326
fn main(
2427
@builtin(global_invocation_id) GlobalInvocationID: vec3<u32>) {
28+
let ic: u32 = GlobalInvocationID.x * 2; // x and y values are adjacent
2529
let i: u32 = GlobalInvocationID.x;
2630
if (i < arrayLength(&pos1)) {
27-
// TODO
31+
// Double pendulum x and y values are adjacent in the arrays
32+
let x1: f32 = pos1[ic];
33+
let y1: f32 = pos1[ic + 1];
34+
let vx1: f32 = vel1[ic];
35+
let vy1: f32 = vel1[ic + 1];
36+
let x2: f32 = pos2[ic];
37+
let y2: f32 = pos2[ic + 1];
38+
let vx2: f32 = vel2[ic];
39+
let vy2: f32 = vel2[ic + 1];
40+
let l: f32 = length[i];
41+
let m: f32 = mass[i];
42+
2843
2944
}
3045
}
3146
)";
3247

3348
int main() {
34-
printf("\nHello, gpu.cpp\n\n");
3549
Context ctx = CreateContext();
3650
static constexpr size_t N = 1000;
3751

38-
std::array<float, N> x1Arr, x2Arr, y1Arr, y2Arr, vx1Arr, vy1Arr, vx2Arr, vy2Arr, lengthArr, massArr;
52+
std::array<float, N> x1Arr, x2Arr, y1Arr, y2Arr, vx1Arr, vy1Arr, vx2Arr,
53+
vy2Arr, lengthArr, massArr;
3954

4055
Tensor pos1 = CreateTensor(ctx, Shape{N}, kf32, x1Arr.data());
4156
Tensor pos2 = CreateTensor(ctx, Shape{N}, kf32, x2Arr.data());
@@ -45,11 +60,9 @@ int main() {
4560
Tensor mass = CreateTensor(ctx, Shape{N}, kf32, massArr.data());
4661

4762
Shape nThreads{N, 1, 1};
48-
Kernel update = CreateKernel(
49-
ctx, CreateShader(kShaderSimulation, 256, kf32),
50-
TensorList{pos1, vel1, pos2, vel2,
51-
length, mass},
52-
nThreads);
63+
Kernel update =
64+
CreateKernel(ctx, CreateShader(kShaderSimulation, 256, kf32),
65+
TensorList{pos1, vel1, pos2, vel2, length, mass}, nThreads);
5366
while (true) {
5467
auto start = std::chrono::high_resolution_clock::now();
5568
ResetCommandBuffer(ctx.device, nThreads, update);
@@ -61,5 +74,4 @@ int main() {
6174
std::chrono::duration<double> elapsed = end - start;
6275
std::this_thread::sleep_for(std::chrono::milliseconds(16) - elapsed);
6376
}
64-
6577
}

0 commit comments

Comments
 (0)