@@ -32,6 +32,12 @@ default: run-native
32
32
run_llm.c : ./build/test_gpt2 dawnlib
33
33
$(LIBSPEC ) && $<
34
34
35
+ run_llm.c_with_metal_profiler : ./build/test_gpt2_with_metal_profiler dawnlib
36
+ $(LIBSPEC ) && export METAL_CAPTURE_ENABLED=1 && $<
37
+
38
+ run_llm.c_with_time_profiler : ./build/test_gpt2_with_metal_profiler dawnlib
39
+ $(LIBSPEC ) && xcrun xctrace record --template ' Time Profiler' --launch -- $<
40
+
35
41
run_llm.c_train : ./build/train_gpt2 dawnlib
36
42
if [ ! -d dev ] ; then ln -s $( GPUCPP) /third_party/llm.c/dev ; fi
37
43
if [ ! -f gpt2_tokenizer.bin ] ; then ln -s $( GPUCPP) /third_party/llm.c/gpt2_tokenizer.bin ; fi
@@ -48,8 +54,9 @@ gpt2_124M.bin: llm.c
48
54
ln -s ./llm.c/gpt2_tokenizer.bin ; \
49
55
fi
50
56
51
- build/test_gpt2 : llm.c build/unittest_kernels.o gpt2_124M.bin
52
- mkdir -p build
57
+ define preprocess_file
58
+ sed -i -e 's/int main(/int MAIN(/g' llm.c/test_gpt2.c
59
+ sed -i -e 's/int main(/int MAIN(/g' llm.c/train_gpt2.c
53
60
sed -i -e 's/void encoder_forward(/void ENCODER_FORWARD_CPU(/g' llm.c/train_gpt2.c
54
61
sed -i -e 's/void layernorm_forward(/void LAYERNORM_FORWARD_CPU(/g' llm.c/train_gpt2.c
55
62
sed -i -e 's/void matmul_forward(/void MATMUL_FORWARD_CPU(/g' llm.c/train_gpt2.c
@@ -67,26 +74,22 @@ build/test_gpt2: llm.c build/unittest_kernels.o gpt2_124M.bin
67
74
sed -i -e 's/void crossentropy_softmax_backward(/void CROSSENTROPY_SOFTMAX_BACKWARD_CPU(/g' llm.c/train_gpt2.c
68
75
grep -q "^# include \"unittest_kernels.h\"" llm.c/train_gpt2.c || \
69
76
printf '1i\n#include "unittest_kernels.h"\n.\nw\nq\n' | ed -s llm.c/train_gpt2.c
77
+ endef
78
+
79
+ build/test_gpt2 : llm.c build/unittest_kernels.o gpt2_124M.bin
80
+ mkdir -p build
81
+ $(call preprocess_file)
70
82
$(CC ) $(CFLAGS ) $(LDFLAGS ) -o $@ llm.c/test_gpt2.c build/unittest_kernels.o
71
83
84
+ build/test_gpt2_with_metal_profiler : llm.c build/unittest_kernels.o gpt2_124M.bin
85
+ mkdir -p build
86
+ $(call preprocess_file)
87
+ $(CC ) $(CFLAGS ) $(LDFLAGS ) -o $@ llm.c/test_gpt2.c build/unittest_kernels.o -I$(GPUCPP ) $(GPUCPP ) /experimental/profiler/metal.mm -framework metal -framework Foundation -DMETAL_PROFILER -g
88
+ install_name_tool -change @rpath/libdawn.dylib $(GPUCPP ) /third_party/lib/libdawn.dylib $@
89
+
72
90
build/train_gpt2 : llm.c build/unittest_kernels.o gpt2_124M.bin
73
91
mkdir -p build
74
- sed -i -e ' s/void encoder_forward(/void ENCODER_FORWARD_CPU(/g' llm.c/train_gpt2.c
75
- sed -i -e ' s/void layernorm_forward(/void LAYERNORM_FORWARD_CPU(/g' llm.c/train_gpt2.c
76
- sed -i -e ' s/void matmul_forward(/void MATMUL_FORWARD_CPU(/g' llm.c/train_gpt2.c
77
- sed -i -e ' s/void attention_forward(/void ATTENTION_FORWARD_CPU(/g' llm.c/train_gpt2.c
78
- sed -i -e ' s/void gelu_forward(/void GELU_FORWARD_CPU(/g' llm.c/train_gpt2.c
79
- sed -i -e ' s/void residual_forward(/void RESIDUAL_FORWARD_CPU(/g' llm.c/train_gpt2.c
80
- sed -i -e ' s/void softmax_forward(/void SOFTMAX_FORWARD_CPU(/g' llm.c/train_gpt2.c
81
- sed -i -e ' s/void crossentropy_forward(/void CROSSENTROPY_FORWARD_CPU(/g' llm.c/train_gpt2.c
82
- sed -i -e ' s/void encoder_backward(/void ENCODER_BACKWARD_CPU(/g' llm.c/train_gpt2.c
83
- sed -i -e ' s/void layernorm_backward(/void LAYERNORM_BACKWARD_CPU(/g' llm.c/train_gpt2.c
84
- sed -i -e ' s/void matmul_backward(/void MATMUL_BACKWARD_CPU(/g' llm.c/train_gpt2.c
85
- sed -i -e ' s/void attention_backward(/void ATTENTION_BACKWARD_CPU(/g' llm.c/train_gpt2.c
86
- sed -i -e ' s/void gelu_backward(/void GELU_BACKWARD_CPU(/g' llm.c/train_gpt2.c
87
- sed -i -e ' s/void residual_backward(/void RESIDUAL_BACKWARD_CPU(/g' llm.c/train_gpt2.c
88
- sed -i -e ' s/void crossentropy_softmax_backward(/void CROSSENTROPY_SOFTMAX_BACKWARD_CPU(/g' llm.c/train_gpt2.c
89
- grep -q " ^#include \" unittest_kernels.h\" " llm.c/train_gpt2.c || sed -i ' 1i#include \"unittest_kernels.h\"' llm.c/train_gpt2.c
92
+ $(call preprocess_file)
90
93
$(CC ) $(CFLAGS ) $(LDFLAGS ) -o $@ llm.c/train_gpt2.c build/unittest_kernels.o
91
94
92
95
build/ops.o : ops.cpp ops.hpp kernels.h llm.c
0 commit comments