1+ #include " gpu.h"
2+ #include < array>
3+ #include < cstdio>
4+ #include < future>
5+
6+ using namespace gpu ; // createContext, createTensor, createKernel,
7+ // createShader, dispatchKernel, wait, toCPU
8+ // Tensor, Kernel, Context, Shape, kf32
9+
10+ static const char *kTan = R"(
11+ @group(0) @binding(0) var<storage, read_write> inp: array<{{precision}}>;
12+ @group(0) @binding(1) var<storage, read_write> out: array<{{precision}}>;
13+ @compute @workgroup_size({{workgroupSize}})
14+ fn main(
15+ @builtin(global_invocation_id) GlobalInvocationID: vec3<u32>) {
16+ let i: u32 = GlobalInvocationID.x;
17+ if (i < arrayLength(&inp)) {
18+ let x: f32 = inp[i];
19+ out[i] = tan(x);
20+ }
21+ }
22+ )" ;
23+
24+ int main (int argc, char **argv) {
25+ printf (" \033 [2J\033 [1;1H" );
26+ printf (" \n Hello gpu.cpp!\n " );
27+ printf (" --------------\n\n " );
28+
29+ Context ctx = createContext ();
30+ static constexpr size_t N = 100000 ;
31+ std::array<float , N> inputArr, outputArr;
32+ for (int i = 0 ; i < N; ++i) {
33+ inputArr[i] = static_cast <float >(i) / 10.0 ; // dummy input data
34+ }
35+ Tensor input = createTensor (ctx, Shape{N}, kf32, inputArr.data ());
36+ Tensor output = createTensor (ctx, Shape{N}, kf32);
37+ std::promise<void > promise;
38+ std::future<void > future = promise.get_future ();
39+ Kernel op = createKernel (ctx, {kTan , 256 , kf32},
40+ Bindings{input, output},
41+ /* nWorkgroups */ {cdiv (N, 256 ), 1 , 1 });
42+ dispatchKernel (ctx, op, promise);
43+ wait (ctx, future);
44+ toCPU (ctx, output, outputArr.data (), sizeof (outputArr));
45+ for (int i = 0 ; i < 1000 ; ++i) {
46+ printf (" tan(%.2f) = %.10f\n " , inputArr[i], outputArr[i]);
47+ }
48+ printf (" ...\n\n " );
49+ printf (" Computed %zu values of tan(x)\n\n " , N);
50+ return 0 ;
51+ }
0 commit comments