@@ -8,9 +8,9 @@ using namespace gpu; // CreateContext, CreateTensor, CreateKernel,
8
8
// CreateShader, DispatchKernel, Wait, ToCPU
9
9
// Tensor, TensorList Kernel, Context, Shape, kf32
10
10
11
- const char *kShaderSimulation = R"(
11
+ const char *kShaderUpdateSim = R"(
12
12
const G: f32 = 9.81;
13
- const dt: f32 = 0.04 ;
13
+ const dt: f32 = 0.03 ;
14
14
@group(0) @binding(0) var<storage, read_write> theta1: array<f32>;
15
15
@group(0) @binding(1) var<storage, read_write> theta2: array<f32>;
16
16
@group(0) @binding(2) var<storage, read_write> thetaVel1: array<f32>;
@@ -47,13 +47,13 @@ fn main(@builtin(global_invocation_id) global_id : vec3<u32>) {
47
47
}
48
48
)" ;
49
49
50
- void render (float *pos, size_t n, float maxX, float maxY, size_t screenWidth ,
51
- size_t screenHeight) {
52
- static const char reverse_intensity [] = " .`'^-+=*x17X$8#%@" ;
53
- const size_t eps = 2 ;
50
+ void rasterize (float *pos, size_t n, float maxX, float maxY, std::string &screen ,
51
+ size_t screenWidth, size_t screenHeight) {
52
+ static const char intensity [] = " .`'^-+=*x17X$8#%@" ;
53
+ const size_t eps = 1 ;
54
54
// iterate over screen
55
55
for (size_t i = 0 ; i < screenHeight; ++i) {
56
- for (size_t j = 0 ; j < screenWidth; ++j) {
56
+ for (size_t j = 0 ; j < screenWidth - 2 ; ++j) {
57
57
int count = 0 ;
58
58
for (size_t k = 0 ; k < 2 * n; k += 2 ) {
59
59
float nx =
@@ -67,21 +67,22 @@ void render(float *pos, size_t n, float maxX, float maxY, size_t screenWidth,
67
67
count++;
68
68
}
69
69
}
70
- count = std::min (count, 17 );
71
- // printf("%d", n);
72
- printf (" %c" , reverse_intensity[count]);
70
+ count = std::min (count / 2 , 17 ); // Need to adjust this for N
71
+ screen[i * screenWidth + j] = intensity[count];
73
72
}
74
- printf (" |\n " );
75
- }
76
- for (size_t i = 0 ; i < screenWidth + 1 ; ++i) {
77
- printf (" -" );
73
+ screen[i * screenWidth + screenWidth - 1 ] = ' \n ' ;
78
74
}
75
+ // clear screen
76
+ printf (" \033 [2J\033 [1;1H" );
77
+ printf (" # simulations: %d\n %s" , n / 2 , screen.c_str ());
79
78
}
80
79
81
80
int main () {
82
81
Context ctx = CreateContext ();
83
82
84
83
// N can be quite a bit larger than this on most GPUs
84
+ // At some point the inefficient rasterization code above will probably be
85
+ // the bottleneck
85
86
static constexpr size_t N = 1000 ;
86
87
87
88
// Since m1 = m2, no mass in the update equation
@@ -90,8 +91,8 @@ int main() {
90
91
std::fill (v1Arr.begin (), v1Arr.end (), 0.0 );
91
92
std::fill (v2Arr.begin (), v2Arr.end (), 0.0 );
92
93
for (size_t i = 0 ; i < N; ++i) {
93
- theta1Arr[i] = 3.14159 / 2 + i * 3.14159 / N;
94
- theta2Arr[i] = 3.14159 / 2 + i * 3.14159 / N ;
94
+ theta1Arr[i] = 3.14159 / 2 + i * 3.14159 / 16 / N;
95
+ theta2Arr[i] = 3.14159 / 2 + i * 3.14159 / 16 / N - 0.1 ;
95
96
lengthArr[i] = 1.0 - i * 0.5 / N;
96
97
}
97
98
Tensor theta1 = CreateTensor (ctx, Shape{N}, kf32, theta1Arr.data ());
@@ -103,28 +104,25 @@ int main() {
103
104
std::array<float , 2 * 2 * N> posArr;
104
105
Tensor pos = CreateTensor (ctx, Shape{N * 4 }, kf32);
105
106
Shape nThreads{N, 1 , 1 };
106
- ShaderCode shader = CreateShader (kShaderSimulation , 256 , kf32);
107
+ ShaderCode shader = CreateShader (kShaderUpdateSim , 256 , kf32);
107
108
printf (" Shader code: %s\n " , shader.data .c_str ());
108
109
Kernel update = CreateKernel (
109
110
ctx, shader, TensorList{theta1, theta2, vel1, vel2, length, pos},
110
111
nThreads);
111
112
113
+ std::string screen (80 * 40 , ' ' );
112
114
while (true ) {
113
115
auto start = std::chrono::high_resolution_clock::now ();
114
116
std::promise<void > promise;
115
117
std::future<void > future = promise.get_future ();
116
118
DispatchKernel (ctx, update, promise);
117
119
ResetCommandBuffer (ctx.device , nThreads, update);
118
120
Wait (ctx, future);
119
-
120
- ToCPU (ctx, pos, posArr.data (), sizeof (pos));
121
+ ToCPU (ctx, pos, posArr.data (), sizeof (posArr));
121
122
auto end = std::chrono::high_resolution_clock::now ();
122
123
std::chrono::duration<double > elapsed = end - start;
123
- // printf("x1: %.2f, y1: %.2f\nx2: %.2f, y2: %.2f\n", pos1Arr[0],
124
- // pos1Arr[1],pos2Arr[0], pos2Arr[1]);
125
- printf (" \033 [2J\033 [1;1H" );
126
- // render(posArr.data(), N * 2, 2.0, 2.0, 40, 40);
127
- render (posArr.data (), N, 2.0 , 2.0 , 80 , 40 );
128
- std::this_thread::sleep_for (std::chrono::milliseconds (16 ) - elapsed);
124
+ // N * 2 because there's two objects per pendulum
125
+ rasterize (posArr.data (), N * 2 , 2.0 , 2.0 , screen, 80 , 40 );
126
+ std::this_thread::sleep_for (std::chrono::milliseconds (8 ) - elapsed);
129
127
}
130
128
}
0 commit comments