@@ -32,6 +32,12 @@ default: run-native
32
32
run_llm.c : ./build/test_gpt2 dawnlib
33
33
$(LIBSPEC ) && $<
34
34
35
+ run_llm.c_with_metal_profiler : ./build/test_gpt2_with_metal_profiler dawnlib
36
+ $(LIBSPEC ) && $<
37
+
38
+ run_llm.c_with_time_profiler : ./build/test_gpt2_with_metal_profiler dawnlib
39
+ $(LIBSPEC ) && xcrun xctrace record --template ' Time Profiler' --launch -- ./build/test_gpt2_with_metal_profiler
40
+
35
41
run_llm.c_train : ./build/train_gpt2 dawnlib
36
42
if [ ! -d dev ] ; then ln -s $( GPUCPP) /third_party/llm.c/dev ; fi
37
43
if [ ! -f gpt2_tokenizer.bin ] ; then ln -s $( GPUCPP) /third_party/llm.c/gpt2_tokenizer.bin ; fi
@@ -48,8 +54,7 @@ gpt2_124M.bin: llm.c
48
54
ln -s ./llm.c/gpt2_tokenizer.bin ; \
49
55
fi
50
56
51
- build/test_gpt2 : llm.c build/unittest_kernels.o gpt2_124M.bin
52
- mkdir -p build
57
+ define preprocess_file
53
58
sed -i -e 's/void encoder_forward(/void ENCODER_FORWARD_CPU(/g' llm.c/train_gpt2.c
54
59
sed -i -e 's/void layernorm_forward(/void LAYERNORM_FORWARD_CPU(/g' llm.c/train_gpt2.c
55
60
sed -i -e 's/void matmul_forward(/void MATMUL_FORWARD_CPU(/g' llm.c/train_gpt2.c
@@ -67,26 +72,22 @@ build/test_gpt2: llm.c build/unittest_kernels.o gpt2_124M.bin
67
72
sed -i -e 's/void crossentropy_softmax_backward(/void CROSSENTROPY_SOFTMAX_BACKWARD_CPU(/g' llm.c/train_gpt2.c
68
73
grep -q "^# include \"unittest_kernels.h\"" llm.c/train_gpt2.c || \
69
74
printf '1i\n#include "unittest_kernels.h"\n.\nw\nq\n' | ed -s llm.c/train_gpt2.c
75
+ endef
76
+
77
+ build/test_gpt2 : llm.c build/unittest_kernels.o gpt2_124M.bin
78
+ mkdir -p build
79
+ $(call preprocess_file)
70
80
$(CC ) $(CFLAGS ) $(LDFLAGS ) -o $@ llm.c/test_gpt2.c build/unittest_kernels.o
71
81
82
+ build/test_gpt2_with_metal_profiler : llm.c build/unittest_kernels.o gpt2_124M.bin
83
+ mkdir -p build
84
+ $(call preprocess_file)
85
+ $(CC ) $(CFLAGS ) $(LDFLAGS ) -o $@ llm.c/test_gpt2.c build/unittest_kernels.o -g
86
+ install_name_tool -change @rpath/libdawn.dylib $(GPUCPP ) /third_party/lib/libdawn.dylib $@
87
+
72
88
build/train_gpt2 : llm.c build/unittest_kernels.o gpt2_124M.bin
73
89
mkdir -p build
74
- sed -i -e ' s/void encoder_forward(/void ENCODER_FORWARD_CPU(/g' llm.c/train_gpt2.c
75
- sed -i -e ' s/void layernorm_forward(/void LAYERNORM_FORWARD_CPU(/g' llm.c/train_gpt2.c
76
- sed -i -e ' s/void matmul_forward(/void MATMUL_FORWARD_CPU(/g' llm.c/train_gpt2.c
77
- sed -i -e ' s/void attention_forward(/void ATTENTION_FORWARD_CPU(/g' llm.c/train_gpt2.c
78
- sed -i -e ' s/void gelu_forward(/void GELU_FORWARD_CPU(/g' llm.c/train_gpt2.c
79
- sed -i -e ' s/void residual_forward(/void RESIDUAL_FORWARD_CPU(/g' llm.c/train_gpt2.c
80
- sed -i -e ' s/void softmax_forward(/void SOFTMAX_FORWARD_CPU(/g' llm.c/train_gpt2.c
81
- sed -i -e ' s/void crossentropy_forward(/void CROSSENTROPY_FORWARD_CPU(/g' llm.c/train_gpt2.c
82
- sed -i -e ' s/void encoder_backward(/void ENCODER_BACKWARD_CPU(/g' llm.c/train_gpt2.c
83
- sed -i -e ' s/void layernorm_backward(/void LAYERNORM_BACKWARD_CPU(/g' llm.c/train_gpt2.c
84
- sed -i -e ' s/void matmul_backward(/void MATMUL_BACKWARD_CPU(/g' llm.c/train_gpt2.c
85
- sed -i -e ' s/void attention_backward(/void ATTENTION_BACKWARD_CPU(/g' llm.c/train_gpt2.c
86
- sed -i -e ' s/void gelu_backward(/void GELU_BACKWARD_CPU(/g' llm.c/train_gpt2.c
87
- sed -i -e ' s/void residual_backward(/void RESIDUAL_BACKWARD_CPU(/g' llm.c/train_gpt2.c
88
- sed -i -e ' s/void crossentropy_softmax_backward(/void CROSSENTROPY_SOFTMAX_BACKWARD_CPU(/g' llm.c/train_gpt2.c
89
- grep -q " ^#include \" unittest_kernels.h\" " llm.c/train_gpt2.c || sed -i ' 1i#include \"unittest_kernels.h\"' llm.c/train_gpt2.c
90
+ $(call preprocess_file)
90
91
$(CC ) $(CFLAGS ) $(LDFLAGS ) -o $@ llm.c/train_gpt2.c build/unittest_kernels.o
91
92
92
93
build/ops.o : ops.cpp ops.hpp kernels.h llm.c
0 commit comments