Skip to content

Commit a0859ca

Browse files
committed
update web targets to match native targets in passing context into ops
1 parent e58f612 commit a0859ca

File tree

4 files changed

+13
-30
lines changed

4 files changed

+13
-30
lines changed

experimental/kernels/Makefile

Lines changed: 10 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,8 @@ endif
1515
# EMFLAGS=-std=c++17 -I$(GPUCPP) -I$(GPUCPP)/third_party/headers/wasm -I. -Iunittest_llmc -Illm.c -s USE_WEBGPU=1 -s -s STACK_SIZE=100000 -s MEMORY64=1 -s ALLOW_MEMORY_GROWTH=1
1616
EMFLAGS=-std=c++17 -I$(GPUCPP) -I$(GPUCPP)/third_party/headers/wasm -I. -Iunittest_llmc -Illm.c -s USE_WEBGPU=1 -s ASYNCIFY=1 -s STACK_SIZE=100000 -s MEMORY64=1 -s ALLOW_MEMORY_GROWTH=1
1717
CXXFLAGS=-std=c++17 -I$(GPUCPP) -I$(GPUCPP)/third_party/headers -I. -Iunittest_llmc
18-
CFLAGS=-Ofast -march=native -I. -Iunittest_llmc
18+
# CFLAGS=-Ofast -march=native -I. -Iunittest_llmc
19+
CFLAGS=-O2 -march=native -I. -Iunittest_llmc
1920

2021
LDFLAGS=$(STDLIB) -L$(GPUCPP)/third_party/lib -ldl -ldawn
2122
FLAGS=$(CXXFLAGS) $(LDFLAGS)
@@ -91,48 +92,34 @@ build/train_gpt2: llm.c build/unittest_kernels.o gpt2_124M.bin
9192
$(CC) $(CFLAGS) $(LDFLAGS) -o $@ llm.c/train_gpt2.c build/unittest_kernels.o
9293

9394
build/ops.o: ops.cpp ops.hpp kernels.h
94-
mkdir -p build && $(CXX) $(CXXFLAGS) -g -c -o $@ $<
95-
96-
# build/gpt2_webgpu: llm.c build/ops.o gpt2_124M.bin
97-
# mkdir -p build
98-
# $(CC) -g $(CXXFLAGS) -Illm.c $(LDFLAGS) -o $@ gpt2_webgpu.cpp build/ops.o
95+
mkdir -p build && $(CXX) $(CXXFLAGS) -c -o $@ $<
9996

10097
build/gpt2_webgpu: llm.c gpt2_124M.bin
10198
mkdir -p build
102-
$(CC) -g $(CXXFLAGS) -Illm.c $(LDFLAGS) -o $@ gpt2_webgpu.cpp ops.cpp
103-
104-
build/test_gpt2.html: check-emsdk run.cpp term.html build/test_gpt2
105-
em++ llm.c/test_gpt2.c unittest_llmc/unittest_kernels.cpp \
106-
--preload-file gpt2_tokenizer.bin@/gpt2_tokenizer.bin \
107-
--preload-file gpt2_124M.bin@/gpt2_124M.bin \
108-
--preload-file gpt2_124M_debug_state.bin@/gpt2_124M_debug_state.bin \
109-
--preload-file llm.c/dev/data/tinyshakespeare/tiny_shakespeare_train.bin@dev/data/tinyshakespeare/tiny_shakespeare_train.bin \
110-
--preload-file llm.c/dev/data/tinyshakespeare/tiny_shakespeare_val.bin@dev/data/tinyshakespeare/tiny_shakespeare_val.bin \
111-
-o build/test_gpt2.html \
112-
$(EMFLAGS) \
113-
--shell-file term.html \
99+
$(CC) $(CXXFLAGS) -Illm.c $(LDFLAGS) -o $@ gpt2_webgpu.cpp ops.cpp
114100

115-
build/train_gpt2.html: check-emsdk run.cpp term.html build/train_gpt2
116-
em++ llm.c/train_gpt2.c unittest_llmc/unittest_kernels.cpp \
101+
build/gpt2_webgpu.html: check-emsdk gpt2_webgpu.cpp term.html
102+
em++ gpt2_webgpu.cpp ops.cpp \
117103
--preload-file gpt2_tokenizer.bin@/gpt2_tokenizer.bin \
118104
--preload-file gpt2_124M.bin@/gpt2_124M.bin \
119105
--preload-file gpt2_124M_debug_state.bin@/gpt2_124M_debug_state.bin \
120106
--preload-file llm.c/dev/data/tinyshakespeare/tiny_shakespeare_train.bin@dev/data/tinyshakespeare/tiny_shakespeare_train.bin \
121107
--preload-file llm.c/dev/data/tinyshakespeare/tiny_shakespeare_val.bin@dev/data/tinyshakespeare/tiny_shakespeare_val.bin \
122-
-o build/train_gpt2.html \
108+
-o build/gpt2_webgpu.html \
123109
$(EMFLAGS) \
124110
--shell-file term.html \
125111

126112
watch-web:
127-
ls *.cpp *.c *.hpp *.h | entr -s make build/gpt2_gpucpp.html
113+
ls *.cpp *.c *.hpp *.h | entr -s make build/gpt2_webgpu.html
128114

129115
watch-native:
130116
ls *.cpp *.c *.hpp *.h | entr -s "rm -f build/gpt2_webgpu && rm -f build/ops.o && make build/gpt2_webgpu"
131117

132118
run-native: build/gpt2_webgpu
133119
. $(GPUCPP)/source && ./build/gpt2_webgpu
134120

135-
server: build/train_gpt2.html build/test_gpt2.html build/gpt2_gpucpp.html
121+
# server: build/train_gpt2.html build/test_gpt2.html build/gpt2_gpucpp.html
122+
server: build/gpt2_webgpu.html
136123
@echo "\n┌───────────────────────────────────────────────────────────────────────────────────┐"
137124
@echo "│ Open http://localhost:8000/build/run.html in your browser to see the output. │"
138125
@echo "│ │"

experimental/kernels/gpt2_webgpu.cpp

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,5 @@
11
#include "gpu.hpp"
2-
#ifdef __EMSCRIPTEN__
3-
#include "unittest_kernels.h" // replace once we figure out how to get context to persist
4-
#else
52
#include "ops.hpp"
6-
#endif
73
/*
84
This file trains the GPT-2 model.
95
This version is the clean, minimal, reference. As such:
@@ -37,7 +33,7 @@ There will be other versions of this code that specialize it and make it fast.
3733

3834
// CPU reference implementations
3935
#include <iostream>
40-
#include "gpt2_cpu.hpp"
36+
// #include "gpt2_cpu.hpp"
4137

4238
using namespace gpu;
4339

experimental/kernels/ops.cpp

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9,8 +9,6 @@
99

1010
using namespace gpu;
1111

12-
#define VOCAB_SIZE 50257
13-
1412
void encoder_forward(Context& ctx, float* out,
1513
int* inp, float* wte, float* wpe,
1614
int B, int T, int C){

experimental/kernels/ops.hpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,8 @@ using namespace gpu;
99
extern "C" {
1010
#endif
1111

12+
#define VOCAB_SIZE 50257
13+
1214
// See https://github.com/google/dawn/blob/a8fbe981a86cb59536e2de423d2013a82d9b54a0/src/dawn/native/Limits.cpp
1315
#define LIMITS_BUFFER_SIZE_1GB { \
1416
.nextInChain = nullptr, \

0 commit comments

Comments
 (0)