@@ -11,31 +11,46 @@ using namespace gpu; // CreateContext, CreateTensor, CreateKernel,
11
11
const char *kShaderSimulation = R"(
12
12
const G: f32 = 9.81;
13
13
const dt: f32 = 0.01;
14
+
15
+ // size = 2 * # of pendulums
14
16
@group(0) @binding(0) var<storage, read_write> pos1: array<{{precision}}>;
15
17
@group(0) @binding(1) var<storage, read_write> vel1: array<{{precision}}>;
16
18
@group(0) @binding(2) var<storage, read_write> pos2: array<{{precision}}>;
17
19
@group(0) @binding(3) var<storage, read_write> vel2: array<{{precision}}>;
20
+
21
+ // size = # of pendulums
18
22
@group(0) @binding(4) var<storage, read_write> length: array<{{precision}}>;
19
23
@group(0) @binding(5) var<storage, read_write> mass: array<{{precision}}>;
20
- @group(0) @binding(6) var<storage, read_write> output: array<{{precision}}>;
21
24
22
25
@compute @workgroup_size({{workgroupSize}})
23
26
fn main(
24
27
@builtin(global_invocation_id) GlobalInvocationID: vec3<u32>) {
28
+ let ic: u32 = GlobalInvocationID.x * 2; // x and y values are adjacent
25
29
let i: u32 = GlobalInvocationID.x;
26
30
if (i < arrayLength(&pos1)) {
27
- // TODO
31
+ // Double pendulum x and y values are adjacent in the arrays
32
+ let x1: f32 = pos1[ic];
33
+ let y1: f32 = pos1[ic + 1];
34
+ let vx1: f32 = vel1[ic];
35
+ let vy1: f32 = vel1[ic + 1];
36
+ let x2: f32 = pos2[ic];
37
+ let y2: f32 = pos2[ic + 1];
38
+ let vx2: f32 = vel2[ic];
39
+ let vy2: f32 = vel2[ic + 1];
40
+ let l: f32 = length[i];
41
+ let m: f32 = mass[i];
42
+
28
43
29
44
}
30
45
}
31
46
)" ;
32
47
33
48
int main () {
34
- printf (" \n Hello, gpu.cpp\n\n " );
35
49
Context ctx = CreateContext ();
36
50
static constexpr size_t N = 1000 ;
37
51
38
- std::array<float , N> x1Arr, x2Arr, y1Arr, y2Arr, vx1Arr, vy1Arr, vx2Arr, vy2Arr, lengthArr, massArr;
52
+ std::array<float , N> x1Arr, x2Arr, y1Arr, y2Arr, vx1Arr, vy1Arr, vx2Arr,
53
+ vy2Arr, lengthArr, massArr;
39
54
40
55
Tensor pos1 = CreateTensor (ctx, Shape{N}, kf32, x1Arr.data ());
41
56
Tensor pos2 = CreateTensor (ctx, Shape{N}, kf32, x2Arr.data ());
@@ -45,11 +60,9 @@ int main() {
45
60
Tensor mass = CreateTensor (ctx, Shape{N}, kf32, massArr.data ());
46
61
47
62
Shape nThreads{N, 1 , 1 };
48
- Kernel update = CreateKernel (
49
- ctx, CreateShader (kShaderSimulation , 256 , kf32),
50
- TensorList{pos1, vel1, pos2, vel2,
51
- length, mass},
52
- nThreads);
63
+ Kernel update =
64
+ CreateKernel (ctx, CreateShader (kShaderSimulation , 256 , kf32),
65
+ TensorList{pos1, vel1, pos2, vel2, length, mass}, nThreads);
53
66
while (true ) {
54
67
auto start = std::chrono::high_resolution_clock::now ();
55
68
ResetCommandBuffer (ctx.device , nThreads, update);
@@ -61,5 +74,4 @@ int main() {
61
74
std::chrono::duration<double > elapsed = end - start;
62
75
std::this_thread::sleep_for (std::chrono::milliseconds (16 ) - elapsed);
63
76
}
64
-
65
77
}
0 commit comments