Skip to content

Commit a1b31f7

Browse files
committed
Simplify shader API - use Create* convention for CreateShader, start gpu puzzles example, pin dawn build commit, add some checks to Makefile
1 parent 2223ea4 commit a1b31f7

File tree

7 files changed

+130
-120
lines changed

7 files changed

+130
-120
lines changed

CMakeLists.txt

+6
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,12 @@ IF (NOT WEBGPU_TAG)
3030
ENDIF()
3131
message(STATUS "Using WebGPU distribution tag: ${WEBGPU_TAG}")
3232

33+
IF (WEBGPU_TAG STREQUAL "dawn")
34+
# Pin the dawn backend to a specific commit
35+
set(WEBGPU_TAG "1025b977e1927b6d0327e67352f90feb4bcf8274")
36+
message(STATUS "Using Dawn backend")
37+
ENDIF()
38+
3339
FetchContent_Declare(
3440
webgpu
3541
GIT_REPOSITORY ${WEBGPU_DIST_GIT_REPO}

Makefile

+16-11
Original file line numberDiff line numberDiff line change
@@ -19,41 +19,46 @@ DEBUG_FLAGS = $(FLAGS) -DDEBUG:BOOL=ON
1919
EMSCRIPTEN_FLAGS = -DIMPLEMENTATION=emscripten -DCMAKE_CXX_COMPILER=em++
2020
LOCAL_FLAGS = -DUSE_LOCAL_LIBS=ON
2121

22-
demo:
22+
demo: check-dependencies
2323
mkdir -p build && cd build && cmake .. $(FASTBUILD_FLAGS) && make -j$(NUM_JOBS) $(TARGET_DEMO) && ./$(TARGET_DEMO)
2424

25-
tests:
25+
# check for the existence of clang++ and cmake
26+
check-dependencies:
27+
@command -v clang++ >/dev/null 2>&1 || { echo >&2 "Please install clang++ with 'sudo apt-get install clang' or 'brew install llvm'"; exit 1; }
28+
@command -v cmake >/dev/null 2>&1 || { echo >&2 "Please install cmake with 'sudo apt-get install cmake' or 'brew install cmake'"; exit 1; }
29+
30+
tests: check-dependencies
2631
mkdir -p build && cd build && cmake .. $(FASTBUILD_FLAGS) && make -j$(NUM_JOBS) $(TARGET_TESTS) && ./$(TARGET_TESTS)
2732

28-
libgpu:
33+
libgpu: check-dependencies
2934
mkdir -p build && cd build && cmake .. $(RELEASE_FLAGS) && make -j$(NUM_JOBS) gpu
3035

31-
debug:
36+
debug: check-dependencies
3237
mkdir -p build && cd build && cmake .. $(DEBUG_FLAGS) && make -j$(NUM_JOBS) $(TARGET_ALL)
3338

34-
build:
39+
build: check-dependencies
3540
mkdir -p build && cd build && cmake .. $(RELEASE_FLAGS) && make -j$(NUM_JOBS) $(TARGET_ALL)
3641

37-
emscripten:
42+
emscripten: check-dependencies
3843
mkdir -p build && cd build && cmake .. $(EMSCRIPTEN_FLAGS) -DIMPLEMENTATION=emscripten && make -j$(NUM_JOBS) $(TARGET_ALL)
3944

4045
check-entr:
4146
@command -v entr >/dev/null 2>&1 || { echo >&2 "Please install entr with 'brew install entr' or 'sudo apt-get install entr'"; exit 1; }
4247

43-
watch-demo: check-entr
48+
watch-demo: check-entr check-dependencies
4449
mkdir -p build && cd build && cmake .. $(FASTBUILD_FLAGS) && ls ../* ../utils/* | entr -s "rm -f $(TARGET_DEMO) && make -j$(NUM_JOBS) $(TARGET_DEMO) && ./$(TARGET_DEMO)"
4550

46-
watch-tests:
51+
watch-tests: check-entr check-dependencies
4752
mkdir -p build && cd build && cmake .. $(FASTBUILD_FLAGS) && ls ../* ../utils/* | entr -s "rm -f $(TARGET_TESTS) && make -j$(NUM_JOBS) $(TARGET_TESTS) && ./$(TARGET_TESTS)"
4853

4954
# experimental
50-
watch-tests-wgpu:
55+
watch-tests-wgpu: check-entr check-dependencies
5156
mkdir -p build && cd build && cmake .. $(FASTBUILD_FLAGS) $(USE_WGPU) && ls ../* ../utils/* | entr -s "rm -f $(TARGET_TESTS) && make -j$(NUM_JOBS) $(TARGET_TESTS) && ./$(TARGET_TESTS)"
5257

53-
watch-demo-local: check-entr
58+
watch-demo-local: check-entr check-dependencies
5459
mkdir -p build && cd build && cmake .. $(FASTBUILD_FLAGS) $(LOCAL_FLAGS) && ls ../* ../utils/* | entr -s "rm -f $(TARGET_DEMO) && make -j$(NUM_JOBS) $(TARGET_DEMO) && ./$(TARGET_DEMO)"
5560

56-
watch-tests-local:
61+
watch-tests-local: check-entr check-dependencies
5762
mkdir -p build && cd build && cmake .. $(FASTBUILD_FLAGS) $(LOCAL_FLAGS) && ls ../* ../utils/* | entr -s "rm -f $(TARGET_TESTS) && make -j$(NUM_JOBS) $(TARGET_TESTS) && ./$(TARGET_TESTS)"
5863

5964
clean-build:

README.md

+10
Original file line numberDiff line numberDiff line change
@@ -86,6 +86,11 @@ The only dependency of this library is a WebGPU implementation. Currently we
8686
recommend using the Dawn backend until further testing, but we plan to support
8787
emscripten (web) and wgpu (native) backends.
8888

89+
You should have clang and cmake installed (we currently test on 3.25+). On mac,
90+
you can install cmake using [homebrew](https://brew.sh/) with: `brew install
91+
cmake`. On Ubuntu, you can install cmake using `apt-get` with: `sudo apt-get
92+
install cmake`.
93+
8994
The build is handled by cmake. Some useful common cmake invocations are wrapped
9095
in the convenience Makefile. To start you can try building a terminal demo
9196
tutorial which also tests the functionality of the library, this builds the
@@ -186,6 +191,10 @@ If you need to clean up the build artifacts, you can run:
186191
make clean
187192
```
188193

194+
## Troubleshooting
195+
196+
If you run into issues building the project, please open an issue.
197+
189198
## Motivation and Goals
190199

191200
Although gpu.cpp is intended for any form of general purpose GPU computation,
@@ -237,6 +246,7 @@ rendering/graphics on the GPU, although it might be useful for compute shaders
237246
in graphics projects - one of the examples is a small compute renderer,
238247
rendered to the terminal.
239248

249+
240250
## Contributing and Work-in-Progress
241251

242252
We welcome contributions! There's a lot of low hanging fruit - fleshing out

examples/hello_world/run.cpp

+21-4
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,27 @@
1-
#include <array>
2-
#include <cstdio>
31
#include "gpu.h"
42
#include "nn/shaders.h"
3+
#include <array>
4+
#include <cstdio>
55

66
using namespace gpu;
77

8+
static const char *kGelu = R"(
9+
const GELU_SCALING_FACTOR: f32 = 0.7978845608028654; // sqrt(2.0 / PI)
10+
@group(0) @binding(0) var<storage, read_write> inp: array<{{precision}}>;
11+
@group(0) @binding(1) var<storage, read_write> out: array<{{precision}}>;
12+
@compute @workgroup_size({{workgroupSize}})
13+
fn main(
14+
@builtin(global_invocation_id) GlobalInvocationID: vec3<u32>) {
15+
let i: u32 = GlobalInvocationID.x;
16+
// Ensure we do not access out of bounds
17+
if (i < arrayLength(&inp)) {
18+
let x: f32 = inp[i];
19+
let cube: f32 = 0.044715 * x * x * x;
20+
out[i] = 0.5 * x * (1.0 + tanh(GELU_SCALING_FACTOR * (x + cube)));
21+
}
22+
}
23+
)";
24+
825
int main(int argc, char **argv) {
926
GPUContext ctx = CreateGPUContext();
1027
fprintf(stdout, "\nHello, gpu.cpp\n\n");
@@ -17,8 +34,8 @@ int main(int argc, char **argv) {
1734
GPUTensor input = CreateTensor(ctx, {N}, kf32, inputArr.data());
1835
GPUTensor output = CreateTensor(ctx, {N}, kf32, outputArr.data());
1936

20-
Kernel op =
21-
CreateKernel(ctx, GeluShader(256, kf32), std::array{input}, output);
37+
Kernel op = CreateKernel(ctx, CreateShader(kGelu, 256, kf32),
38+
std::array{input}, output);
2239
DispatchKernel(ctx, op);
2340
Wait(ctx, op.future);
2441
ToCPU(ctx, output, outputArr.data(), sizeof(outputArr));

gpu.h

+46-28
Original file line numberDiff line numberDiff line change
@@ -11,8 +11,8 @@
1111
#include <unordered_map>
1212
#include <vector>
1313

14-
#include "webgpu/webgpu.h"
1514
#include "utils/logging.h"
15+
#include "webgpu/webgpu.h"
1616

1717
namespace gpu {
1818

@@ -67,7 +67,7 @@ struct GPUTensor {
6767
};
6868

6969
struct TensorPool {
70-
TensorPool(GPUContext *ctx) : ctx(ctx), data(){};
70+
TensorPool(GPUContext *ctx) : ctx(ctx), data() {};
7171
GPUContext *ctx;
7272
std::unordered_map<WGPUBuffer, GPUTensor> data;
7373
~TensorPool();
@@ -121,9 +121,9 @@ const char *ToString(NumType type) {
121121

122122
/* Tensor factory function */
123123
GPUTensor CreateTensor(TensorPool &pool, const Shape &shape, NumType dtype,
124-
WGPUBufferUsageFlags usage = WGPUBufferUsage_Storage |
125-
WGPUBufferUsage_CopyDst |
126-
WGPUBufferUsage_CopySrc) {
124+
WGPUBufferUsageFlags usage = WGPUBufferUsage_Storage |
125+
WGPUBufferUsage_CopyDst |
126+
WGPUBufferUsage_CopySrc) {
127127
log(kDefLog, kInfo, "Creating tensor");
128128
size_t numElements = 1;
129129
for (size_t dim = 0; dim < shape.rank; dim++) {
@@ -146,16 +146,17 @@ GPUTensor CreateTensor(TensorPool &pool, const Shape &shape, NumType dtype,
146146
/* Syntactic sugar - take in ctx instead of pool*/
147147
GPUTensor CreateTensor(GPUContext &ctx, const Shape &shape, NumType dtype) {
148148
return CreateTensor(ctx.pool, shape, dtype,
149-
WGPUBufferUsage_Storage | WGPUBufferUsage_CopyDst |
150-
WGPUBufferUsage_CopySrc);
149+
WGPUBufferUsage_Storage | WGPUBufferUsage_CopyDst |
150+
WGPUBufferUsage_CopySrc);
151151
}
152152

153153
/* With Value Initialization (pointer) */
154154
GPUTensor CreateTensor(GPUContext &ctx, const Shape &shape, NumType dtype,
155-
float *data) {
156-
GPUTensor tensor = CreateTensor(ctx.pool, shape, dtype,
157-
WGPUBufferUsage_Storage | WGPUBufferUsage_CopyDst |
158-
WGPUBufferUsage_CopySrc);
155+
float *data) {
156+
GPUTensor tensor =
157+
CreateTensor(ctx.pool, shape, dtype,
158+
WGPUBufferUsage_Storage | WGPUBufferUsage_CopyDst |
159+
WGPUBufferUsage_CopySrc);
159160
wgpuQueueWriteBuffer(ctx.queue, tensor.data.buffer, 0, data,
160161
tensor.data.size);
161162
return tensor;
@@ -187,16 +188,33 @@ struct CallbackDataDyn {
187188
};
188189

189190
struct ShaderCode {
190-
std::string code;
191+
std::string data;
191192
size_t wgSize; // workgroup size
192193
};
193194

195+
void ReplaceAll(std::string &str, const std::string &from,
196+
const std::string &to) {
197+
size_t start_pos = 0;
198+
while ((start_pos = str.find(from, start_pos)) != std::string::npos) {
199+
str.replace(start_pos, from.length(), to);
200+
start_pos += to.length();
201+
}
202+
}
203+
204+
ShaderCode CreateShader(const char *shaderRaw, size_t workgroupSize,
205+
NumType precision) {
206+
std::string codeString(shaderRaw);
207+
ReplaceAll(codeString, "{{workgroupSize}}", std::to_string(workgroupSize));
208+
ReplaceAll(codeString, "{{precision}}", ToString(precision));
209+
return ShaderCode{codeString, workgroupSize};
210+
}
211+
194212
struct KernelDesc {
195213
const ShaderCode shader;
196214
const GPUTensor *inputs;
197215
size_t numInputs;
198216
const GPUTensor output;
199-
const void* params;
217+
const void *params;
200218
const size_t paramSize;
201219
};
202220

@@ -441,9 +459,9 @@ void ToGPU(GPUContext &ctx, const float *data, GPUTensor &tensor) {
441459
}
442460

443461
Kernel CreateKernel(GPUContext &ctx, const ShaderCode &shader,
444-
const GPUTensor *inputs, size_t numInputs,
445-
const GPUTensor &output, const void *params = nullptr,
446-
size_t paramsSize = 0) {
462+
const GPUTensor *inputs, size_t numInputs,
463+
const GPUTensor &output, const void *params = nullptr,
464+
size_t paramsSize = 0) {
447465
WGPUDevice device = ctx.device;
448466
WGPUQueue queue = ctx.queue;
449467
Kernel op;
@@ -591,7 +609,7 @@ Kernel CreateKernel(GPUContext &ctx, const ShaderCode &shader,
591609
pipelineLayout =
592610
wgpuDeviceCreatePipelineLayout(device, &pipelineLayoutDesc);
593611
WGPUShaderModuleWGSLDescriptor wgslDesc = {
594-
.code = shader.code.c_str(),
612+
.code = shader.data.c_str(),
595613
};
596614
wgslDesc.chain.sType = WGPUSType_ShaderModuleWGSLDescriptor;
597615
WGPUShaderModuleDescriptor shaderModuleDesc = {};
@@ -634,14 +652,14 @@ Kernel CreateKernel(GPUContext &ctx, const ShaderCode &shader,
634652

635653
template <typename ParamsType = NoParam>
636654
Kernel CreateKernel(GPUContext &ctx, const ShaderCode &shader,
637-
const GPUTensor *inputs, size_t numInputs,
638-
const GPUTensor &output,
639-
const ParamsType &params = ParamsType{}) {
655+
const GPUTensor *inputs, size_t numInputs,
656+
const GPUTensor &output,
657+
const ParamsType &params = ParamsType{}) {
640658
if constexpr (!IsNoParam<ParamsType>) {
641659
log(kDefLog, kInfo, "Using params of size %d bytes", sizeof(ParamsType));
642660
return CreateKernel(ctx, shader, inputs, numInputs, output,
643-
reinterpret_cast<const void *>(&params),
644-
sizeof(ParamsType));
661+
reinterpret_cast<const void *>(&params),
662+
sizeof(ParamsType));
645663
} else {
646664
log(kDefLog, kInfo, "No params");
647665
return CreateKernel(ctx, shader, inputs, numInputs, output, nullptr, 0);
@@ -653,11 +671,11 @@ Kernel CreateKernel(GPUContext &ctx, const ShaderCode &shader,
653671
*/
654672
template <typename ParamsType = NoParam, size_t numInputs>
655673
Kernel CreateKernel(GPUContext &ctx, const ShaderCode &shader,
656-
const std::array<GPUTensor, numInputs> &inputs,
657-
const GPUTensor &output,
658-
const ParamsType &params = ParamsType{}) {
659-
return CreateKernel<ParamsType>(ctx, shader, inputs.data(), numInputs,
660-
output, params);
674+
const std::array<GPUTensor, numInputs> &inputs,
675+
const GPUTensor &output,
676+
const ParamsType &params = ParamsType{}) {
677+
return CreateKernel<ParamsType>(ctx, shader, inputs.data(), numInputs, output,
678+
params);
661679
}
662680

663681
MultiKernel CreateMultiKernel(GPUContext &ctx, const MultiKernelDesc &desc) {
@@ -791,7 +809,7 @@ MultiKernel CreateMultiKernel(GPUContext &ctx, const MultiKernelDesc &desc) {
791809
// Create shader module
792810
log(kDefLog, kInfo, "Create shader module");
793811
WGPUShaderModuleWGSLDescriptor wgslDesc = {
794-
.code = desc.shader[shaderIndex].code.c_str(),
812+
.code = desc.shader[shaderIndex].data.c_str(),
795813
};
796814
wgslDesc.chain.sType = WGPUSType_ShaderModuleWGSLDescriptor;
797815
WGPUShaderModuleDescriptor shaderModuleDesc = {

0 commit comments

Comments
 (0)