|
| 1 | +#include "gpu.h" |
| 2 | +#include <array> |
| 3 | +#include <chrono> |
| 4 | +#include <cstdio> |
| 5 | + |
| 6 | +using namespace gpu; // CreateContext, CreateTensor, CreateKernel, |
| 7 | + // CreateShader, DispatchKernel, Wait, ToCPU |
| 8 | + // Tensor, TensorList Kernel, Context, Shape, kf32 |
| 9 | + |
| 10 | +const char *kShaderSimulation = R"( |
| 11 | +const G: f32 = 9.81; |
| 12 | +const dt: f32 = 0.01; |
| 13 | +@group(0) @binding(0) var<storage, read_write> pos1: array<{{precision}}>; |
| 14 | +@group(0) @binding(1) var<storage, read_write> vel1: array<{{precision}}>; |
| 15 | +@group(0) @binding(2) var<storage, read_write> pos2: array<{{precision}}>; |
| 16 | +@group(0) @binding(3) var<storage, read_write> vel2: array<{{precision}}>; |
| 17 | +@group(0) @binding(4) var<storage, read_write> length: array<{{precision}}>; |
| 18 | +@group(0) @binding(5) var<storage, read_write> mass: array<{{precision}}>; |
| 19 | +@group(0) @binding(6) var<storage, read_write> output: array<{{precision}}>; |
| 20 | +
|
| 21 | +@compute @workgroup_size({{workgroupSize}}) |
| 22 | +fn main( |
| 23 | + @builtin(global_invocation_id) GlobalInvocationID: vec3<u32>) { |
| 24 | + let i: u32 = GlobalInvocationID.x; |
| 25 | + if (i < arrayLength(&pos1)) { |
| 26 | + // TODO |
| 27 | +
|
| 28 | + } |
| 29 | +} |
| 30 | +)"; |
| 31 | + |
| 32 | +int main() { |
| 33 | + printf("\nHello, gpu.cpp\n\n"); |
| 34 | + Context ctx = CreateContext(); |
| 35 | + static constexpr size_t N = 1000; |
| 36 | + |
| 37 | + std::array<float, N> x1Arr, x2Arr, y1Arr, y2Arr, vx1Arr, vy1Arr, vx2Arr, vy2Arr, lengthArr, massArr; |
| 38 | + |
| 39 | + Tensor pos1 = CreateTensor(ctx, Shape{N}, kf32, x1Arr.data()); |
| 40 | + Tensor pos2 = CreateTensor(ctx, Shape{N}, kf32, x2Arr.data()); |
| 41 | + Tensor vel1 = CreateTensor(ctx, Shape{N}, kf32, vx1Arr.data()); |
| 42 | + Tensor vel2 = CreateTensor(ctx, Shape{N}, kf32, vy1Arr.data()); |
| 43 | + Tensor length = CreateTensor(ctx, Shape{N}, kf32, lengthArr.data()); |
| 44 | + Tensor mass = CreateTensor(ctx, Shape{N}, kf32, massArr.data()); |
| 45 | + |
| 46 | + // TODO: no need to have output |
| 47 | + Tensor output = CreateTensor(ctx, Shape{N}, kf32); |
| 48 | + |
| 49 | + Shape nThreads{N, 1, 1}; |
| 50 | + Kernel update = CreateKernel( |
| 51 | + ctx, CreateShader(kShaderSimulation, 256, kf32), |
| 52 | + TensorList{pos1, vel1, pos2, vel2, |
| 53 | + length, mass}, output, |
| 54 | + nThreads); |
| 55 | + while (true) { |
| 56 | + auto start = std::chrono::high_resolution_clock::now(); |
| 57 | + ResetCommandBuffer(ctx.device, nThreads, update); |
| 58 | + |
| 59 | + DispatchKernel(ctx, update); |
| 60 | + Wait(ctx, update.future); |
| 61 | + auto end = std::chrono::high_resolution_clock::now(); |
| 62 | + std::chrono::duration<double> elapsed = end - start; |
| 63 | + std::this_thread::sleep_for(std::chrono::milliseconds(16) - elapsed); |
| 64 | + } |
| 65 | + |
| 66 | +} |
0 commit comments