@@ -377,9 +377,8 @@ void gpt2_build_from_checkpoint(Context& ctx, GPT2 *model, const char* checkpoin
377377 model->batch_size = 0 ;
378378 model->seq_len = 0 ;
379379 model->mean_loss = -1 .0f ; // -1.0f will designate no loss
380- // Allocate B * C buffer for mean loss
381- model->mean_loss_buffer = (float *)mallocCheck (sizeof (float ) * model->batch_size * model->seq_len );
382- model->probs_buffer = (float *)mallocCheck (sizeof (float ) * model->batch_size * model->seq_len * Vp);
380+ model->mean_loss_buffer = NULL ;
381+ model->probs_buffer = NULL ;
383382 model->backward_enabled = false ;
384383
385384 printf (" Model build complete\n " );
@@ -418,6 +417,8 @@ void gpt2_forward(Context& ctx, GPT2 *model, Tensor& inputs, Tensor& targets, si
418417 model->seq_len = T;
419418 // and now allocate the space
420419 fill_in_activation_sizes (model->act_sizes , model->config , B, T);
420+ model->mean_loss_buffer = (float *)mallocCheck (sizeof (float ) * model->batch_size * model->seq_len );
421+ model->probs_buffer = (float *)mallocCheck (sizeof (float ) * model->batch_size * model->seq_len * Vp);
421422
422423 // TODO(avh): this is just a resource test for now, eventually deprecate CPU allocations
423424 size_t num_activations = 0 ;
@@ -635,7 +636,6 @@ void gpt2_forward(Context& ctx, GPT2 *model, Tensor& inputs, Tensor& targets, si
635636 }
636637 // for convenience also evaluate the mean loss
637638 float mean_loss = 0 .0f ;
638- // toCPU(ctx, model->acts_.data[22], model->acts.losses.data, model->act_sizes[22] * sizeof(float));
639639 toCPU (ctx, model->acts .losses , model->mean_loss_buffer , B*T * sizeof (float ));
640640 for (int i=0 ; i<B*T; i++) { mean_loss += model->mean_loss_buffer [i]; }
641641 mean_loss /= B*T;
0 commit comments