@@ -377,9 +377,8 @@ void gpt2_build_from_checkpoint(Context& ctx, GPT2 *model, const char* checkpoin
377
377
model->batch_size = 0 ;
378
378
model->seq_len = 0 ;
379
379
model->mean_loss = -1 .0f ; // -1.0f will designate no loss
380
- // Allocate B * C buffer for mean loss
381
- model->mean_loss_buffer = (float *)mallocCheck (sizeof (float ) * model->batch_size * model->seq_len );
382
- model->probs_buffer = (float *)mallocCheck (sizeof (float ) * model->batch_size * model->seq_len * Vp);
380
+ model->mean_loss_buffer = NULL ;
381
+ model->probs_buffer = NULL ;
383
382
model->backward_enabled = false ;
384
383
385
384
printf (" Model build complete\n " );
@@ -418,6 +417,8 @@ void gpt2_forward(Context& ctx, GPT2 *model, Tensor& inputs, Tensor& targets, si
418
417
model->seq_len = T;
419
418
// and now allocate the space
420
419
fill_in_activation_sizes (model->act_sizes , model->config , B, T);
420
+ model->mean_loss_buffer = (float *)mallocCheck (sizeof (float ) * model->batch_size * model->seq_len );
421
+ model->probs_buffer = (float *)mallocCheck (sizeof (float ) * model->batch_size * model->seq_len * Vp);
421
422
422
423
// TODO(avh): this is just a resource test for now, eventually deprecate CPU allocations
423
424
size_t num_activations = 0 ;
@@ -635,7 +636,6 @@ void gpt2_forward(Context& ctx, GPT2 *model, Tensor& inputs, Tensor& targets, si
635
636
}
636
637
// for convenience also evaluate the mean loss
637
638
float mean_loss = 0 .0f ;
638
- // toCPU(ctx, model->acts_.data[22], model->acts.losses.data, model->act_sizes[22] * sizeof(float));
639
639
toCPU (ctx, model->acts .losses , model->mean_loss_buffer , B*T * sizeof (float ));
640
640
for (int i=0 ; i<B*T; i++) { mean_loss += model->mean_loss_buffer [i]; }
641
641
mean_loss /= B*T;
0 commit comments