Skip to content

Commit cfdbcc6

Browse files
authored
Merge pull request #74 from AnswerDotAI/dev
0.2.0 Release
2 parents 9a42592 + 6447d85 commit cfdbcc6

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

59 files changed

+6193
-5101
lines changed

.gitmodules

-4
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,3 @@
1-
[submodule "third_party/local/WebGPU-distribution"]
2-
path = third_party/local/WebGPU-distribution
3-
url = https://github.com/eliemichel/WebGPU-distribution.git
4-
branch = dawn
51
[submodule "third_party/llm.c"]
62
path = third_party/llm.c
73
url = https://github.com/karpathy/llm.c

Makefile

+48-10
Original file line numberDiff line numberDiff line change
@@ -19,13 +19,42 @@ pch:
1919
mkdir -p build && $(CXX) -std=c++17 $(INCLUDES) -x c++-header gpu.hpp -o build/gpu.hpp.pch
2020

2121
# TODO(avh): change extension based on platform
22-
lib:
23-
mkdir -p build && $(CXX) -std=c++17 $(INCLUDES) -L$(LIBDIR) -ldawn -ldl -shared -fPIC gpu.cpp -o build/libgpucpp.dylib
22+
# Get the current OS name
23+
OS = $(shell uname | tr -d '\n')
24+
# Set the specific variables for each platform
25+
LIB_PATH ?= /usr/lib
26+
HEADER_PATH ?= /usr/include
27+
ifeq ($(OS), Linux)
28+
OS_TYPE ?= Linux
29+
GPU_CPP_LIB_NAME ?= libgpucpp.so
30+
DAWN_LIB_NAME ?= libwebgpu_dawn.so
31+
else ifeq ($(OS), Darwin)
32+
OS_TYPE ?= macOS
33+
GPU_CPP_LIB_NAME ?= libgpucpp.dylib
34+
DAWN_LIB_NAME ?= libwebgpu_dawn.dylib
35+
else
36+
OS_TYPE ?= unknown
37+
endif
38+
39+
lib: check-clang dawnlib
40+
mkdir -p build && $(CXX) -std=c++17 $(INCLUDES) -L$(LIBDIR) -lwebgpu_dawn -ldl -shared -fPIC gpu.cpp -o build/$(GPU_CPP_LIB_NAME)
41+
python3 build.py
42+
cp third_party/lib/$(DAWN_LIB_NAME) build/
43+
44+
install:
45+
cp build/$(GPU_CPP_LIB_NAME) $(LIB_PATH)
46+
cp build/$(DAWN_LIB_NAME) $(LIB_PATH)
47+
cp build/gpu.hpp $(HEADER_PATH)
48+
49+
uninstall:
50+
rm $(LIB_PATH)/$(GPU_CPP_LIB_NAME)
51+
rm $(LIB_PATH)/$(DAWN_LIB_NAME)
52+
rm $(HEADER_PATH)/gpu.hpp
2453

2554
examples/hello_world/build/hello_world: check-clang dawnlib examples/hello_world/run.cpp check-linux-vulkan
2655
$(LIBSPEC) && cd examples/hello_world && make build/hello_world && ./build/hello_world
2756

28-
dawnlib: $(if $(wildcard third_party/lib/libdawn.so third_party/lib/libdawn.dylib),,run_setup)
57+
dawnlib: $(if $(wildcard third_party/lib/libwebgpu_dawn.so third_party/lib/libwebgpu_dawn.dylib),,run_setup)
2958

3059
run_setup: check-python
3160
python3 setup.py
@@ -42,7 +71,7 @@ all: dawnlib check-clang check-linux-vulkan lib pch
4271

4372
# Test 16-bit floating point type
4473
test-half: dawnlib check-clang
45-
$(LIBSPEC) && clang++ -std=c++17 $(INCLUDES) numeric_types/half.cpp -L$(LIBDIR) -ldawn -ldl -o build/half && ./build/half
74+
$(LIBSPEC) && clang++ -std=c++17 $(INCLUDES) numeric_types/half.cpp -L$(LIBDIR) -lwebgpu_dawn -ldl -o build/half && ./build/half
4675

4776
docs: Doxyfile
4877
doxygen Doxyfile
@@ -73,7 +102,7 @@ all-cmake: check-clang check-cmake
73102
################################################################################
74103

75104
clean-dawnlib:
76-
rm -f third_party/lib/libdawn.so third_party/lib/libdawn.dylib
105+
rm -f third_party/lib/libwebgpu_dawn.so third_party/lib/libwebgpu_dawn.dylib
77106

78107
clean:
79108
read -r -p "This will delete the contents of build/*. Are you sure? [CTRL-C to abort] " response && rm -rf build/*
@@ -90,21 +119,30 @@ clean:
90119
rm -f build/half
91120

92121
clean-all:
93-
read -r -p "This will delete the contents of build/* and third_party/*. Are you sure? [CTRL-C to abort] " response && rm -rf build/* third_party/fetchcontent/* third_party/gpu-build third_party/gpu-subbuild third_party/gpu-src third_party/lib/libdawn.so third_party/lib/libdawn.dylib
122+
read -r -p "This will delete the contents of build/* and third_party/*. Are you sure? [CTRL-C to abort] " response && rm -rf build/* third_party/fetchcontent/* third_party/gpu-build third_party/gpu-subbuild third_party/gpu-src third_party/lib/libwebgpu_dawn.so third_party/lib/libwebgpu_dawn.dylib
94123

95124
################################################################################
96125
# Checks
97126
################################################################################
98127

128+
# Check all
129+
check-all: check-os check-clang check-cmake check-python
130+
131+
# check the os
132+
check-os:
133+
ifeq ($(OS_TYPE), unknown)
134+
$(error Unsupported operating system)
135+
endif
136+
99137
# check for the existence of clang++ and cmake
100138
check-clang:
101-
@command -v clang++ >/dev/null 2>&1 || { echo >&2 "Please install clang++ with 'sudo apt-get install clang' or 'brew install llvm'"; exit 1; }
139+
@command -v clang++ >/dev/null 2>&1 || { echo -e >&2 "Clang++ is not installed. Please install clang++ to continue.\nOn Debian / Ubuntu: 'sudo apt-get install clang' or 'brew install llvm'\nOn Centos: 'sudo yum install clang'"; exit 1; }
102140

103141
check-cmake:
104-
@command -v cmake >/dev/null 2>&1 || { echo >&2 "Please install cmake with 'sudo apt-get install cmake' or 'brew install cmake'"; exit 1; }
142+
@command -v cmake >/dev/null 2>&1 || { echo -e >&2 "Cmake is not installed. Please install cmake to continue.\nOn Debian / Ubuntu: 'sudo apt-get install cmake' or 'brew install cmake'\nOn Centos: 'sudo yum install cmake'"; exit 1; }
105143

106144
check-python:
107-
@command -v python3 >/dev/null 2>&1 || { echo >&2 "Python needs to be installed and in your path."; exit 1; }
145+
@command -v python3 >/dev/null 2>&1 || { echo -e >&2 "Python is not installed. Please install python to continue.\nOn Debian / Ubuntu: 'sudo apt-get install python'\nOn Centos: 'sudo yum install python'"; exit 1; }
108146

109147
check-linux-vulkan:
110148
@echo "Checking system type and Vulkan availability..."
@@ -113,7 +151,7 @@ check-linux-vulkan:
113151
echo "Vulkan is installed."; \
114152
vulkaninfo; \
115153
else \
116-
echo "Vulkan is not installed. Please install Vulkan drivers to continue. On Debian / Ubuntu: sudo apt install libvulkan1 mesa-vulkan-drivers vulkan-tools"; \
154+
echo -e "Vulkan is not installed. Please install Vulkan drivers to continue.\nOn Debian / Ubuntu: 'sudo apt install libvulkan1 mesa-vulkan-drivers vulkan-tools'.\nOn Centos: 'sudo yum install vulkan vulkan-tools.'"; \
117155
exit 1; \
118156
fi \
119157
else \

README.md

+4-12
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ GPU code in C++ projects and have it run on Nvidia, Intel, AMD, and other GPUs.
88
The same C++ code can work on a wide variety of laptops, workstations, mobile
99
devices or virtually any hardware with Vulkan, Metal, or DirectX support.
1010

11-
## Technical Objectives: Lightweight, Fast Iteration, and Low Boilerplate
11+
## Objectives: Lightweight, Fast Iteration, and Low Boilerplate
1212

1313
With gpu.cpp we want to enable a high-leverage library for individual developers and researchers to incorporate GPU computation into programs relying on nothing more than a standard C++ compiler as tooling. Our goals are:
1414

@@ -189,7 +189,7 @@ illustrate how to use gpu.cpp as a library.
189189
190190
After you have run `make` in the top-level directory which retrieves the prebuilt Dawn shared library, you can run each example by navigating to its directory and running `make` from the example's directory.
191191
192-
An example of tiled matrix multiplication is in [examples/matmul](https://github.com/AnswerDotAI/gpu.cpp/blob/main/examples/matmul/). This implements a WebGPU version of the first few kernels of Simon Boehm's [How to Optimize a CUDA Matmul Kernel for cuBLAS-like Performance: a Worklog](https://siboehm.com/articles/22/CUDA-MMM) post. It currently runs at ~ 2.5+ TFLOPs on a Macbook Pro M1 Max laptop, which has a theoretical peak of 10.4 TFLOPs. Contributions to optimize this further are welcome.
192+
An example of tiled matrix multiplication is in [examples/matmul](https://github.com/AnswerDotAI/gpu.cpp/blob/main/examples/matmul/). This implements a WebGPU version of the first few kernels of Simon Boehm's [How to Optimize a CUDA Matmul Kernel for cuBLAS-like Performance: a Worklog](https://siboehm.com/articles/22/CUDA-MMM) post. It currently runs at ~ 3.5+ TFLOPs on a Macbook Pro M1 Max laptop. Contributions to optimize this further are welcome.
193193
194194
A parallel physics simulation of an ensemble of double pendulums simulated in parallel with different initial conditions on the GPU is shown in [examples/physics](https://github.com/AnswerDotAI/gpu.cpp/tree/main/examples/physics).
195195
@@ -198,9 +198,7 @@ A parallel physics simulation of an ensemble of double pendulums simulated in pa
198198
<img src="docs/images/pendulum.gif" alt="physics example animated gif" width=42%>
199199
</div>
200200
201-
We also show some examples of signed distance function computations, rendered in the terminal as ascii. A 3D SDF of spheres is shown in [examples/render](https://github.com/AnswerDotAI/gpu.cpp/tree/main/examples/render]) and a shadertoy-like live-reloading example is in [examples/shadertui](https://github.com/AnswerDotAI/gpu.cpp/tree/main/examples/shadertui).
202-
203-
Interestingly, given a starting example, LLMs such as Claude 3.5 Sonnet can be quite capable at writing low-level WGSL code for you - the other shaders in the shadertui example are written by the LLM.
201+
We also show some examples of signed distance function computations, rendered in the terminal as ascii. A 3D SDF of spheres is shown in [examples/render](https://github.com/AnswerDotAI/gpu.cpp/tree/main/examples/render) and a shadertoy-like live-reloading example is in [examples/shadertui](https://github.com/AnswerDotAI/gpu.cpp/tree/main/examples/shadertui).
204202
205203
<div align="center">
206204
<img src="docs/images/shadertui.gif" alt="shadertui example animated gif" width=88%>
@@ -232,22 +230,16 @@ gpu.cpp lets us implement and drop-in any algorithm with fine-grained control of
232230
233231
gpu.cpp is meant for developers with some familiarity with C++ and GPU programming. It is not a high-level numerical computing or machine learning framework or inference engine, though it can be used in support of such implementations.
234232
235-
Second, in spite of the name, WebGPU has native implementations decoupled from the web and the browser. gpu.cpp leverages WebGPU as a portable _native_ GPU API first and foremost, with the possibility of running in the browser being a convenient additional benefit in the future.
236-
237-
If you find it counterintuitive, as many do, that WebGPU is a native technology and not just for the web, watch Elie Michel's excellent talk ["WebGPU is Not Just About the Web"](https://www.youtube.com/watch?v=qHrx41aOTUQ).
233+
Second, in spite of the name, WebGPU has native implementations decoupled from the web and the browser. If you find it counterintuitive, watch Elie Michel's excellent talk ["WebGPU is Not Just About the Web"](https://www.youtube.com/watch?v=qHrx41aOTUQ).
238234
239235
Finally, the focus of gpu.cpp is general-purpose GPU computation rather than rendering/graphics on the GPU, although it can be useful for offline rendering or video processing use cases. We may explore directions with graphics in the future, but for now our focus is GPU compute.
240236
241237
## Limitations and Upcoming Features
242238
243-
_API Improvements_ - gpu.cpp is a work-in-progress and there are many features and improvements to come. At this early stage, we expect the API design to evolve as we identify improvements / needs from use cases. In particular, the handling of structured parameters and asynchronous dispatch will undergo refinement and maturation in the short-term.
244-
245239
_Browser Targets_ - In spite of using WebGPU we haven't tested builds targeting the browser yet though this is a short-term priority.
246240
247241
_Reusable Kernel Library_ - Currently the core library is strictly the operations and types for interfacing with the WebGPU API, with some specific use case example WGSL implementations in `examples/`. Over time, as kernel implementations mature we may migrate some of the reusable operations from specific examples into a small reusable kernel library.
248242
249-
_More Use Case Examples and Tests_ - Expect an iteration loop of use cases to design tweaks and improvements, which in turn make the use cases cleaner and easier to write. One short term use cases to flesh out the kernels from [llm.c](https://github.com/karpathy/llm.c) in WebGPU form. As these mature into a reusable kernel library, we hope to help realize the potential for WebGPU compute in AI.
250-
251243
## Troubleshooting
252244
253245
If you run into issues building the project, please open an issue.

bindings/haskell/CHANGELOG.md

+5
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
# Revision history for gpu-cpp
2+
3+
## 0.1.0.0 -- 2024-12-28
4+
5+
* First version.

bindings/haskell/Makefile

+3
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
all:
2+
cabal configure --extra-include-dirs=$(PWD)/../.. --extra-include-dirs=$(PWD)/../../third_party/headers --extra-lib-dirs=$(PWD)/../../third_party/lib
3+
cabal build .

bindings/haskell/app/Main.hs

+37
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
module Main where
2+
3+
import GpuCpp.Types
4+
import GpuCpp
5+
import qualified Data.Vector.Storable as V
6+
import Foreign.C.Types
7+
8+
main :: IO ()
9+
main = do
10+
context <- createContext
11+
input <- createTensor context [12] kf32
12+
output <- createTensor context [12] kf32
13+
kernelCode <- createKernelCode
14+
(
15+
"const GELU_SCALING_FACTOR: f32 = 0.7978845608028654; // sqrt(2.0 / PI)\n" <>
16+
"@group(0) @binding(0) var<storage, read_write> inp: array<{{precision}}>;\n" <>
17+
"@group(0) @binding(1) var<storage, read_write> out: array<{{precision}}>;\n" <>
18+
"@group(0) @binding(1) var<storage, read_write> dummy: array<{{precision}}>;\n" <>
19+
"@compute @workgroup_size({{workgroupSize}})\n" <>
20+
"fn main(\n" <>
21+
" @builtin(global_invocation_id) GlobalInvocationID: vec3<u32>) {\n" <>
22+
" let i: u32 = GlobalInvocationID.x;\n" <>
23+
" if (i < arrayLength(&inp)) {\n" <>
24+
" let x: f32 = inp[i];\n" <>
25+
" out[i] = select(0.5 * x * (1.0 + tanh(GELU_SCALING_FACTOR \n" <>
26+
" * (x + .044715 * x * x * x))), x, x > 10.0);\n" <>
27+
" }\n" <>
28+
"}\n"
29+
)
30+
256
31+
kf32
32+
kernel <- createKernel context kernelCode [input, output] [0,0] [12,1,1]
33+
toGpu context (V.fromList [1 :: CFloat,2,3,4,1,2,3,4,1,2,3,4]) input
34+
async <- dispatchKernel context kernel
35+
wait context async
36+
vec <- toCpu context output :: IO (V.Vector CFloat)
37+
print vec

bindings/haskell/gpu-cpp.cabal

+49
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,49 @@
1+
cabal-version: 3.0
2+
name: gpu-cpp
3+
version: 0.1.0.0
4+
license: BSD-3-Clause
5+
author: Junji Hashimoto
6+
maintainer: [email protected]
7+
category: Math
8+
build-type: Simple
9+
10+
extra-doc-files: CHANGELOG.md
11+
12+
common warnings
13+
ghc-options: -Wall
14+
15+
library
16+
import: warnings
17+
exposed-modules: GpuCpp
18+
, GpuCpp.Types
19+
build-depends: base ^>=4.18.1.0
20+
, inline-c
21+
, inline-c-cpp
22+
, containers
23+
, template-haskell
24+
, safe-exceptions
25+
, vector
26+
hs-source-dirs: src
27+
default-language: Haskell2010
28+
ghc-options: -optcxx-std=c++17
29+
extra-libraries: webgpu_dawn
30+
31+
executable gpu-cpp
32+
import: warnings
33+
main-is: Main.hs
34+
build-depends: base ^>=4.18.1.0
35+
, gpu-cpp
36+
, vector
37+
hs-source-dirs: app
38+
default-language: Haskell2010
39+
40+
test-suite gpu-cpp-test
41+
import: warnings
42+
default-language: Haskell2010
43+
type: exitcode-stdio-1.0
44+
hs-source-dirs: test
45+
main-is: Main.hs
46+
build-depends: base ^>=4.18.1.0
47+
, gpu-cpp
48+
, vector
49+
, hspec

0 commit comments

Comments
 (0)