update web targets to match native targets in passing context into ops

austinvhuang · austinvhuang · commit a0859cadd1df · 2024-09-22T20:39:35.000-04:00
diff --git a/experimental/kernels/Makefile b/experimental/kernels/Makefile
@@ -15,7 +15,8 @@ endif
 # EMFLAGS=-std=c++17 -I$(GPUCPP) -I$(GPUCPP)/third_party/headers/wasm -I. -Iunittest_llmc -Illm.c -s USE_WEBGPU=1 -s -s STACK_SIZE=100000 -s MEMORY64=1 -s ALLOW_MEMORY_GROWTH=1
 EMFLAGS=-std=c++17 -I$(GPUCPP) -I$(GPUCPP)/third_party/headers/wasm -I. -Iunittest_llmc -Illm.c -s USE_WEBGPU=1 -s ASYNCIFY=1 -s STACK_SIZE=100000 -s MEMORY64=1 -s ALLOW_MEMORY_GROWTH=1
 CXXFLAGS=-std=c++17 -I$(GPUCPP) -I$(GPUCPP)/third_party/headers -I. -Iunittest_llmc
-CFLAGS=-Ofast -march=native -I. -Iunittest_llmc
+# CFLAGS=-Ofast -march=native -I. -Iunittest_llmc
+CFLAGS=-O2 -march=native -I. -Iunittest_llmc
 
 LDFLAGS=$(STDLIB) -L$(GPUCPP)/third_party/lib -ldl -ldawn
 FLAGS=$(CXXFLAGS) $(LDFLAGS)
@@ -91,48 +92,34 @@ build/train_gpt2: llm.c build/unittest_kernels.o gpt2_124M.bin
 	$(CC) $(CFLAGS) $(LDFLAGS) -o $@ llm.c/train_gpt2.c build/unittest_kernels.o
 
 build/ops.o: ops.cpp ops.hpp kernels.h
-	mkdir -p build && $(CXX) $(CXXFLAGS) -g -c -o $@ $<
-
-# build/gpt2_webgpu: llm.c build/ops.o gpt2_124M.bin
-#	mkdir -p build
-#	$(CC) -g $(CXXFLAGS) -Illm.c $(LDFLAGS) -o $@ gpt2_webgpu.cpp build/ops.o
+	mkdir -p build && $(CXX) $(CXXFLAGS) -c -o $@ $<
 
 build/gpt2_webgpu: llm.c gpt2_124M.bin
 	mkdir -p build
-	$(CC) -g $(CXXFLAGS) -Illm.c $(LDFLAGS) -o $@ gpt2_webgpu.cpp ops.cpp
-
-build/test_gpt2.html: check-emsdk run.cpp term.html build/test_gpt2
-	em++ llm.c/test_gpt2.c unittest_llmc/unittest_kernels.cpp \
-		--preload-file gpt2_tokenizer.bin@/gpt2_tokenizer.bin \
-		--preload-file gpt2_124M.bin@/gpt2_124M.bin \
-		--preload-file gpt2_124M_debug_state.bin@/gpt2_124M_debug_state.bin \
-		--preload-file llm.c/dev/data/tinyshakespeare/tiny_shakespeare_train.bin@dev/data/tinyshakespeare/tiny_shakespeare_train.bin \
-		--preload-file llm.c/dev/data/tinyshakespeare/tiny_shakespeare_val.bin@dev/data/tinyshakespeare/tiny_shakespeare_val.bin \
-		-o build/test_gpt2.html \
-		$(EMFLAGS) \
-		--shell-file term.html \
+	$(CC) $(CXXFLAGS) -Illm.c $(LDFLAGS) -o $@ gpt2_webgpu.cpp ops.cpp
 
-build/train_gpt2.html: check-emsdk run.cpp term.html build/train_gpt2
-	em++ llm.c/train_gpt2.c unittest_llmc/unittest_kernels.cpp \
+build/gpt2_webgpu.html: check-emsdk gpt2_webgpu.cpp term.html
+	em++ gpt2_webgpu.cpp ops.cpp \
 		--preload-file gpt2_tokenizer.bin@/gpt2_tokenizer.bin \
 		--preload-file gpt2_124M.bin@/gpt2_124M.bin \
 		--preload-file gpt2_124M_debug_state.bin@/gpt2_124M_debug_state.bin \
 		--preload-file llm.c/dev/data/tinyshakespeare/tiny_shakespeare_train.bin@dev/data/tinyshakespeare/tiny_shakespeare_train.bin \
 		--preload-file llm.c/dev/data/tinyshakespeare/tiny_shakespeare_val.bin@dev/data/tinyshakespeare/tiny_shakespeare_val.bin \
-		-o build/train_gpt2.html \
+		-o build/gpt2_webgpu.html \
 		$(EMFLAGS) \
 		--shell-file term.html \
 
 watch-web:
-	ls *.cpp *.c *.hpp *.h | entr -s make build/gpt2_gpucpp.html
+	ls *.cpp *.c *.hpp *.h | entr -s make build/gpt2_webgpu.html
 
 watch-native:
 	ls *.cpp *.c *.hpp *.h | entr -s "rm -f build/gpt2_webgpu && rm -f build/ops.o && make build/gpt2_webgpu"
 
 run-native: build/gpt2_webgpu
 	. $(GPUCPP)/source && ./build/gpt2_webgpu
 
-server: build/train_gpt2.html build/test_gpt2.html build/gpt2_gpucpp.html
+# server: build/train_gpt2.html build/test_gpt2.html build/gpt2_gpucpp.html
+server: build/gpt2_webgpu.html
 	@echo "\n┌───────────────────────────────────────────────────────────────────────────────────┐"
 	@echo   "│  Open http://localhost:8000/build/run.html in your browser to see the output.     │"
 	@echo	  "│                                                                                   │"
diff --git a/experimental/kernels/gpt2_webgpu.cpp b/experimental/kernels/gpt2_webgpu.cpp
@@ -1,9 +1,5 @@
 #include "gpu.hpp"
-#ifdef __EMSCRIPTEN__
-#include "unittest_kernels.h" // replace once we figure out how to get context to persist
-#else
 #include "ops.hpp"
-#endif
 /*
 This file trains the GPT-2 model.
 This version is the clean, minimal, reference. As such:
@@ -37,7 +33,7 @@ There will be other versions of this code that specialize it and make it fast.
 
 // CPU reference implementations
 #include <iostream>
-#include "gpt2_cpu.hpp"
+// #include "gpt2_cpu.hpp"
 
 using namespace gpu;
 
diff --git a/experimental/kernels/ops.cpp b/experimental/kernels/ops.cpp
@@ -9,8 +9,6 @@
 
 using namespace gpu;
 
-#define VOCAB_SIZE 50257
-
 void encoder_forward(Context& ctx, float* out,
                          int* inp, float* wte, float* wpe,
                          int B, int T, int C){
diff --git a/experimental/kernels/ops.hpp b/experimental/kernels/ops.hpp
@@ -9,6 +9,8 @@ using namespace gpu;
 extern "C" {
 #endif
 
+#define VOCAB_SIZE 50257
+
 // See https://github.com/google/dawn/blob/a8fbe981a86cb59536e2de423d2013a82d9b54a0/src/dawn/native/Limits.cpp
 #define LIMITS_BUFFER_SIZE_1GB { \
     .nextInChain = nullptr, \