Skip to content

Commit 6be7e1e

Browse files
Fix the bug of memory allocation
1 parent 7ef40b0 commit 6be7e1e

File tree

2 files changed

+6
-6
lines changed

2 files changed

+6
-6
lines changed

experimental/kernels/Makefile

+2-2
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ CXXFLAGS=-std=c++17 -I$(GPUCPP) -I$(GPUCPP)/third_party/headers -I. -Iunittest_l
1616
CFLAGS=-Ofast -march=native -I. -Iunittest_llmc
1717
# CFLAGS=-O2 -march=native -I. -Iunittest_llmc
1818

19-
LDFLAGS=$(STDLIB) -L$(GPUCPP)/third_party/lib -ldl -ldawn
19+
LDFLAGS=$(STDLIB) -L$(GPUCPP)/third_party/lib -ldl -ldawn -fsanitize=address
2020
FLAGS=$(CXXFLAGS) $(LDFLAGS)
2121

2222
ifeq ($(shell [ -d /opt/homebrew/opt/libomp/lib ] && echo "exists"), exists)
@@ -101,7 +101,7 @@ build/gpt2_webgpu: llm.c gpt2_124M.bin llm.c gpt2_webgpu.cpp ops.cpp
101101

102102
build/gpt2_webgpu_aot: llm.c gpt2_124M.bin llm.c gpt2_webgpu_aot.cpp ops_aot.cpp
103103
mkdir -p build
104-
$(CC) $(CXXFLAGS) -Illm.c $(LDFLAGS) -o $@ gpt2_webgpu_aot.cpp ops_aot.cpp
104+
$(CC) $(CXXFLAGS) -Illm.c $(LDFLAGS) -o $@ gpt2_webgpu_aot.cpp ops_aot.cpp -g
105105

106106
build/gpt2_webgpu.html: check-emsdk gpt2_webgpu.cpp term.html llm.c
107107
em++ gpt2_webgpu.cpp ops.cpp \

experimental/kernels/gpt2_webgpu_aot.cpp

+4-4
Original file line numberDiff line numberDiff line change
@@ -377,9 +377,8 @@ void gpt2_build_from_checkpoint(Context& ctx, GPT2 *model, const char* checkpoin
377377
model->batch_size = 0;
378378
model->seq_len = 0;
379379
model->mean_loss = -1.0f; // -1.0f will designate no loss
380-
// Allocate B * C buffer for mean loss
381-
model->mean_loss_buffer = (float*)mallocCheck(sizeof(float) * model->batch_size * model->seq_len);
382-
model->probs_buffer = (float*)mallocCheck(sizeof(float) * model->batch_size * model->seq_len * Vp);
380+
model->mean_loss_buffer = NULL;
381+
model->probs_buffer = NULL;
383382
model->backward_enabled = false;
384383

385384
printf("Model build complete\n");
@@ -418,6 +417,8 @@ void gpt2_forward(Context& ctx, GPT2 *model, Tensor& inputs, Tensor& targets, si
418417
model->seq_len = T;
419418
// and now allocate the space
420419
fill_in_activation_sizes(model->act_sizes, model->config, B, T);
420+
model->mean_loss_buffer = (float*)mallocCheck(sizeof(float) * model->batch_size * model->seq_len);
421+
model->probs_buffer = (float*)mallocCheck(sizeof(float) * model->batch_size * model->seq_len * Vp);
421422

422423
// TODO(avh): this is just a resource test for now, eventually deprecate CPU allocations
423424
size_t num_activations = 0;
@@ -635,7 +636,6 @@ void gpt2_forward(Context& ctx, GPT2 *model, Tensor& inputs, Tensor& targets, si
635636
}
636637
// for convenience also evaluate the mean loss
637638
float mean_loss = 0.0f;
638-
//toCPU(ctx, model->acts_.data[22], model->acts.losses.data, model->act_sizes[22] * sizeof(float));
639639
toCPU(ctx, model->acts.losses, model->mean_loss_buffer, B*T * sizeof(float));
640640
for (int i=0; i<B*T; i++) { mean_loss += model->mean_loss_buffer[i]; }
641641
mean_loss /= B*T;

0 commit comments

Comments
 (0)