Skip to content

Commit 63d6ea6

Browse files
committed
tweak simulation parameters and rendering
1 parent 003d1a6 commit 63d6ea6

File tree

4 files changed

+34
-26
lines changed

4 files changed

+34
-26
lines changed

examples/physics/CMakeLists.txt

+6
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,12 @@ FetchContent_Declare(
2323
)
2424
FetchContent_MakeAvailable(gpu)
2525

26+
option(DEBUG "Option to enable debug flags" OFF)
27+
if(DEBUG)
28+
set(CMAKE_BUILD_TYPE Debug)
29+
set(CMAKE_CXX_FLAGS "-O0 -g")
30+
endif()
31+
2632
add_executable(physics run.cpp)
2733
target_link_libraries(physics gpu webgpu)
2834
target_include_directories(physics PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/../../ )

examples/physics/Makefile

+4
Original file line numberDiff line numberDiff line change
@@ -4,10 +4,14 @@ CXX=clang++
44
TARGET = physics
55
FLAGS = -DCMAKE_VERBOSE_MAKEFILE:BOOL=ON -DCMAKE_CXX_COMPILER="$(CXX)"
66
FASTBUILD_FLAGS = $(FLAGS) -DFASTBUILD:BOOL=ON
7+
DEBUG_FLAGS = $(FLAGS) -DDEBUG:BOOL=ON
78

89
run:
910
mkdir -p build && cd build && cmake .. $(FASTBUILD_FLAGS) && make -j$(NUM_JOBS) $(TARGET) && ./$(TARGET)
1011

12+
debug :
13+
mkdir -p build && cd build && cmake .. $(DEBUG_FLAGS) && make -j$(NUM_JOBS) $(TARGET) && ./$(TARGET)
14+
1115
watch:
1216
@command -v entr >/dev/null 2>&1 || { echo >&2 "Please install entr with 'brew install entr' or 'sudo apt-get install entr'"; exit 1; }
1317
mkdir -p build && cd build && cmake .. $(FASTBUILD_FLAGS) && ls ../* ../utils/* | entr -s "rm -f $(TARGET) && make -j$(NUM_JOBS) $(TARGET) && ./$(TARGET)"

examples/physics/run.cpp

+23-25
Original file line numberDiff line numberDiff line change
@@ -8,9 +8,9 @@ using namespace gpu; // CreateContext, CreateTensor, CreateKernel,
88
// CreateShader, DispatchKernel, Wait, ToCPU
99
// Tensor, TensorList Kernel, Context, Shape, kf32
1010

11-
const char *kShaderSimulation = R"(
11+
const char *kShaderUpdateSim = R"(
1212
const G: f32 = 9.81;
13-
const dt: f32 = 0.04;
13+
const dt: f32 = 0.03;
1414
@group(0) @binding(0) var<storage, read_write> theta1: array<f32>;
1515
@group(0) @binding(1) var<storage, read_write> theta2: array<f32>;
1616
@group(0) @binding(2) var<storage, read_write> thetaVel1: array<f32>;
@@ -47,13 +47,13 @@ fn main(@builtin(global_invocation_id) global_id : vec3<u32>) {
4747
}
4848
)";
4949

50-
void render(float *pos, size_t n, float maxX, float maxY, size_t screenWidth,
51-
size_t screenHeight) {
52-
static const char reverse_intensity[] = " .`'^-+=*x17X$8#%@";
53-
const size_t eps = 2;
50+
void rasterize(float *pos, size_t n, float maxX, float maxY, std::string &screen,
51+
size_t screenWidth, size_t screenHeight) {
52+
static const char intensity[] = " .`'^-+=*x17X$8#%@";
53+
const size_t eps = 1;
5454
// iterate over screen
5555
for (size_t i = 0; i < screenHeight; ++i) {
56-
for (size_t j = 0; j < screenWidth; ++j) {
56+
for (size_t j = 0; j < screenWidth - 2; ++j) {
5757
int count = 0;
5858
for (size_t k = 0; k < 2 * n; k += 2) {
5959
float nx =
@@ -67,21 +67,22 @@ void render(float *pos, size_t n, float maxX, float maxY, size_t screenWidth,
6767
count++;
6868
}
6969
}
70-
count = std::min(count, 17);
71-
// printf("%d", n);
72-
printf("%c", reverse_intensity[count]);
70+
count = std::min(count / 2, 17); // Need to adjust this for N
71+
screen[i * screenWidth + j] = intensity[count];
7372
}
74-
printf("|\n");
75-
}
76-
for(size_t i = 0; i < screenWidth + 1; ++i) {
77-
printf("-");
73+
screen[i * screenWidth + screenWidth - 1] = '\n';
7874
}
75+
// clear screen
76+
printf("\033[2J\033[1;1H");
77+
printf("# simulations: %d\n%s", n / 2, screen.c_str());
7978
}
8079

8180
int main() {
8281
Context ctx = CreateContext();
8382

8483
// N can be quite a bit larger than this on most GPUs
84+
// At some point the inefficient rasterization code above will probably be
85+
// the bottleneck
8586
static constexpr size_t N = 1000;
8687

8788
// Since m1 = m2, no mass in the update equation
@@ -90,8 +91,8 @@ int main() {
9091
std::fill(v1Arr.begin(), v1Arr.end(), 0.0);
9192
std::fill(v2Arr.begin(), v2Arr.end(), 0.0);
9293
for (size_t i = 0; i < N; ++i) {
93-
theta1Arr[i] = 3.14159 / 2 + i * 3.14159 / N;
94-
theta2Arr[i] = 3.14159 / 2 + i * 3.14159 / N;
94+
theta1Arr[i] = 3.14159 / 2 + i * 3.14159 / 16 / N;
95+
theta2Arr[i] = 3.14159 / 2 + i * 3.14159 / 16 / N - 0.1;
9596
lengthArr[i] = 1.0 - i * 0.5 / N;
9697
}
9798
Tensor theta1 = CreateTensor(ctx, Shape{N}, kf32, theta1Arr.data());
@@ -103,28 +104,25 @@ int main() {
103104
std::array<float, 2 * 2 * N> posArr;
104105
Tensor pos = CreateTensor(ctx, Shape{N * 4}, kf32);
105106
Shape nThreads{N, 1, 1};
106-
ShaderCode shader = CreateShader(kShaderSimulation, 256, kf32);
107+
ShaderCode shader = CreateShader(kShaderUpdateSim, 256, kf32);
107108
printf("Shader code: %s\n", shader.data.c_str());
108109
Kernel update = CreateKernel(
109110
ctx, shader, TensorList{theta1, theta2, vel1, vel2, length, pos},
110111
nThreads);
111112

113+
std::string screen(80 * 40, ' ');
112114
while (true) {
113115
auto start = std::chrono::high_resolution_clock::now();
114116
std::promise<void> promise;
115117
std::future<void> future = promise.get_future();
116118
DispatchKernel(ctx, update, promise);
117119
ResetCommandBuffer(ctx.device, nThreads, update);
118120
Wait(ctx, future);
119-
120-
ToCPU(ctx, pos, posArr.data(), sizeof(pos));
121+
ToCPU(ctx, pos, posArr.data(), sizeof(posArr));
121122
auto end = std::chrono::high_resolution_clock::now();
122123
std::chrono::duration<double> elapsed = end - start;
123-
// printf("x1: %.2f, y1: %.2f\nx2: %.2f, y2: %.2f\n", pos1Arr[0],
124-
// pos1Arr[1],pos2Arr[0], pos2Arr[1]);
125-
printf("\033[2J\033[1;1H");
126-
// render(posArr.data(), N * 2, 2.0, 2.0, 40, 40);
127-
render(posArr.data(), N, 2.0, 2.0, 80, 40);
128-
std::this_thread::sleep_for(std::chrono::milliseconds(16) - elapsed);
124+
// N * 2 because there's two objects per pendulum
125+
rasterize(posArr.data(), N * 2, 2.0, 2.0, screen, 80, 40);
126+
std::this_thread::sleep_for(std::chrono::milliseconds(8) - elapsed);
129127
}
130128
}

gpu.h

+1-1
Original file line numberDiff line numberDiff line change
@@ -280,7 +280,7 @@ Tensor CreateTensor(TensorPool &pool, WGPUDevice &device, const Shape &shape,
280280
WGPUBufferUsageFlags usage = WGPUBufferUsage_Storage |
281281
WGPUBufferUsage_CopyDst |
282282
WGPUBufferUsage_CopySrc) {
283-
log(kDefLog, kInfo, "Creating tensor");
283+
log(kDefLog, kTrace, "Creating tensor");
284284
size_t numElements = 1;
285285
for (size_t dim = 0; dim < shape.rank; dim++) {
286286
numElements *= shape.data[dim];

0 commit comments

Comments
 (0)