Skip to content

Commit f629a33

Browse files
Remove NUM_PARAMETER_LAYERS
1 parent 6be7e1e commit f629a33

File tree

1 file changed

+67
-46
lines changed

1 file changed

+67
-46
lines changed

experimental/kernels/gpt2_webgpu_aot.cpp

+67-46
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,6 @@ typedef struct {
4747

4848
// the parameters of the model
4949
#define NUM_PARAMETER_TENSORS 16
50-
#define NUM_PARAMETER_LAYERS 12
5150
typedef struct {
5251
Tensor wte; // (V, C)
5352
Tensor wpe; // (maxT, C)
@@ -91,22 +90,36 @@ void fill_in_parameter_sizes(size_t* param_sizes, GPT2Config config) {
9190
}
9291

9392
// allocate memory for the parameters and point the individual tensors to the right places
94-
void malloc_and_point_parameters(Context& ctx, ParameterTensors* params, size_t* param_sizes) {
93+
void malloc_and_point_parameters(Context& ctx, GPT2Config config, ParameterTensors* params, size_t* param_sizes) {
94+
size_t L = config.num_layers;
9595
params->wte = createTensor(ctx, Shape{param_sizes[0]}, kf32);
9696
params->wpe = createTensor(ctx, Shape{param_sizes[1]}, kf32);
97-
for(int l = 0; l < NUM_PARAMETER_LAYERS; l++) {
98-
params->ln1w.push_back(createTensor(ctx, Shape{param_sizes[2]/NUM_PARAMETER_LAYERS}, kf32));
99-
params->ln1b.push_back(createTensor(ctx, Shape{param_sizes[3]/NUM_PARAMETER_LAYERS}, kf32));
100-
params->qkvw.push_back(createTensor(ctx, Shape{param_sizes[4]/NUM_PARAMETER_LAYERS}, kf32));
101-
params->qkvb.push_back(createTensor(ctx, Shape{param_sizes[5]/NUM_PARAMETER_LAYERS}, kf32));
102-
params->attprojw.push_back(createTensor(ctx, Shape{param_sizes[6]/NUM_PARAMETER_LAYERS}, kf32));
103-
params->attprojb.push_back(createTensor(ctx, Shape{param_sizes[7]/NUM_PARAMETER_LAYERS}, kf32));
104-
params->ln2w.push_back(createTensor(ctx, Shape{param_sizes[8]/NUM_PARAMETER_LAYERS}, kf32));
105-
params->ln2b.push_back(createTensor(ctx, Shape{param_sizes[9]/NUM_PARAMETER_LAYERS}, kf32));
106-
params->fcw.push_back(createTensor(ctx, Shape{param_sizes[10]/NUM_PARAMETER_LAYERS}, kf32));
107-
params->fcb.push_back(createTensor(ctx, Shape{param_sizes[11]/NUM_PARAMETER_LAYERS}, kf32));
108-
params->fcprojw.push_back(createTensor(ctx, Shape{param_sizes[12]/NUM_PARAMETER_LAYERS}, kf32));
109-
params->fcprojb.push_back(createTensor(ctx, Shape{param_sizes[13]/NUM_PARAMETER_LAYERS}, kf32));
97+
98+
params->ln1w.resize(L);
99+
params->ln1b.resize(L);
100+
params->qkvw.resize(L);
101+
params->qkvb.resize(L);
102+
params->attprojw.resize(L);
103+
params->attprojb.resize(L);
104+
params->ln2w.resize(L);
105+
params->ln2b.resize(L);
106+
params->fcw.resize(L);
107+
params->fcb.resize(L);
108+
params->fcprojw.resize(L);
109+
params->fcprojb.resize(L);
110+
for(int l = 0; l < L ; l++) {
111+
params->ln1w[l] = createTensor(ctx, Shape{param_sizes[2]/config.num_layers}, kf32);
112+
params->ln1b[l] = createTensor(ctx, Shape{param_sizes[3]/config.num_layers}, kf32);
113+
params->qkvw[l] = createTensor(ctx, Shape{param_sizes[4]/config.num_layers}, kf32);
114+
params->qkvb[l] = createTensor(ctx, Shape{param_sizes[5]/config.num_layers}, kf32);
115+
params->attprojw[l] = createTensor(ctx, Shape{param_sizes[6]/config.num_layers}, kf32);
116+
params->attprojb[l] = createTensor(ctx, Shape{param_sizes[7]/config.num_layers}, kf32);
117+
params->ln2w[l] = createTensor(ctx, Shape{param_sizes[8]/config.num_layers}, kf32);
118+
params->ln2b[l] = createTensor(ctx, Shape{param_sizes[9]/config.num_layers}, kf32);
119+
params->fcw[l] = createTensor(ctx, Shape{param_sizes[10]/config.num_layers}, kf32);
120+
params->fcb[l] = createTensor(ctx, Shape{param_sizes[11]/config.num_layers}, kf32);
121+
params->fcprojw[l] = createTensor(ctx, Shape{param_sizes[12]/config.num_layers}, kf32);
122+
params->fcprojb[l] = createTensor(ctx, Shape{param_sizes[13]/config.num_layers}, kf32);
110123
}
111124
params->lnfw = createTensor(ctx, Shape{param_sizes[14]}, kf32);
112125
params->lnfb = createTensor(ctx, Shape{param_sizes[15]}, kf32);
@@ -201,25 +214,42 @@ void fill_in_activation_sizes(size_t* act_sizes, GPT2Config config, int B, int T
201214
act_sizes[22] = B * T; // losses
202215
}
203216

204-
void malloc_and_point_activations(Context& ctx, ActivationTensors* acts, size_t* act_sizes) {
217+
void malloc_and_point_activations(Context& ctx, GPT2Config config, ActivationTensors* acts, size_t* act_sizes) {
218+
size_t L = config.num_layers;
205219
acts->encoded = createTensor(ctx, Shape{act_sizes[0]}, kf32);
206-
for (int l = 0; l < NUM_PARAMETER_LAYERS; l++) {
207-
acts->ln1.push_back(createTensor(ctx, Shape{act_sizes[1]/NUM_PARAMETER_LAYERS}, kf32));
208-
acts->ln1_mean.push_back(createTensor(ctx, Shape{act_sizes[2]/NUM_PARAMETER_LAYERS}, kf32));
209-
acts->ln1_rstd.push_back(createTensor(ctx, Shape{act_sizes[3]/NUM_PARAMETER_LAYERS}, kf32));
210-
acts->qkv.push_back(createTensor(ctx, Shape{act_sizes[4]/NUM_PARAMETER_LAYERS}, kf32));
211-
acts->atty.push_back(createTensor(ctx, Shape{act_sizes[5]/NUM_PARAMETER_LAYERS}, kf32));
212-
acts->preatt.push_back(createTensor(ctx, Shape{act_sizes[6]/NUM_PARAMETER_LAYERS}, kf32));
213-
acts->att.push_back(createTensor(ctx, Shape{act_sizes[7]/NUM_PARAMETER_LAYERS}, kf32));
214-
acts->attproj.push_back(createTensor(ctx, Shape{act_sizes[8]/NUM_PARAMETER_LAYERS}, kf32));
215-
acts->residual2.push_back(createTensor(ctx, Shape{act_sizes[9]/NUM_PARAMETER_LAYERS}, kf32));
216-
acts->ln2.push_back(createTensor(ctx, Shape{act_sizes[10]/NUM_PARAMETER_LAYERS}, kf32));
217-
acts->ln2_mean.push_back(createTensor(ctx, Shape{act_sizes[11]/NUM_PARAMETER_LAYERS}, kf32));
218-
acts->ln2_rstd.push_back(createTensor(ctx, Shape{act_sizes[12]/NUM_PARAMETER_LAYERS}, kf32));
219-
acts->fch.push_back(createTensor(ctx, Shape{act_sizes[13]/NUM_PARAMETER_LAYERS}, kf32));
220-
acts->fch_gelu.push_back(createTensor(ctx, Shape{act_sizes[14]/NUM_PARAMETER_LAYERS}, kf32));
221-
acts->fcproj.push_back(createTensor(ctx, Shape{act_sizes[15]/NUM_PARAMETER_LAYERS}, kf32));
222-
acts->residual3.push_back(createTensor(ctx, Shape{act_sizes[16]/NUM_PARAMETER_LAYERS}, kf32));
220+
acts->ln1.resize(L);
221+
acts->ln1_mean.resize(L);
222+
acts->ln1_rstd.resize(L);
223+
acts->qkv.resize(L);
224+
acts->atty.resize(L);
225+
acts->preatt.resize(L);
226+
acts->att.resize(L);
227+
acts->attproj.resize(L);
228+
acts->residual2.resize(L);
229+
acts->ln2.resize(L);
230+
acts->ln2_mean.resize(L);
231+
acts->ln2_rstd.resize(L);
232+
acts->fch.resize(L);
233+
acts->fch_gelu.resize(L);
234+
acts->fcproj.resize(L);
235+
acts->residual3.resize(L);
236+
for (int l = 0; l < L; l++) {
237+
acts->ln1[l] = createTensor(ctx, Shape{act_sizes[1]/config.num_layers}, kf32);
238+
acts->ln1_mean[l] = createTensor(ctx, Shape{act_sizes[2]/config.num_layers}, kf32);
239+
acts->ln1_rstd[l] = createTensor(ctx, Shape{act_sizes[3]/config.num_layers}, kf32);
240+
acts->qkv[l] = createTensor(ctx, Shape{act_sizes[4]/config.num_layers}, kf32);
241+
acts->atty[l] = createTensor(ctx, Shape{act_sizes[5]/config.num_layers}, kf32);
242+
acts->preatt[l] = createTensor(ctx, Shape{act_sizes[6]/config.num_layers}, kf32);
243+
acts->att[l] = createTensor(ctx, Shape{act_sizes[7]/config.num_layers}, kf32);
244+
acts->attproj[l] = createTensor(ctx, Shape{act_sizes[8]/config.num_layers}, kf32);
245+
acts->residual2[l] = createTensor(ctx, Shape{act_sizes[9]/config.num_layers}, kf32);
246+
acts->ln2[l] = createTensor(ctx, Shape{act_sizes[10]/config.num_layers}, kf32);
247+
acts->ln2_mean[l] = createTensor(ctx, Shape{act_sizes[11]/config.num_layers}, kf32);
248+
acts->ln2_rstd[l] = createTensor(ctx, Shape{act_sizes[12]/config.num_layers}, kf32);
249+
acts->fch[l] = createTensor(ctx, Shape{act_sizes[13]/config.num_layers}, kf32);
250+
acts->fch_gelu[l] = createTensor(ctx, Shape{act_sizes[14]/config.num_layers}, kf32);
251+
acts->fcproj[l] = createTensor(ctx, Shape{act_sizes[15]/config.num_layers}, kf32);
252+
acts->residual3[l] = createTensor(ctx, Shape{act_sizes[16]/config.num_layers}, kf32);
223253
}
224254
acts->lnf = createTensor(ctx, Shape{act_sizes[17]}, kf32);
225255
acts->lnf_mean = createTensor(ctx, Shape{act_sizes[18]}, kf32);
@@ -229,15 +259,6 @@ void malloc_and_point_activations(Context& ctx, ActivationTensors* acts, size_t*
229259
acts->losses = createTensor(ctx, Shape{act_sizes[22]}, kf32);
230260
}
231261

232-
struct GPUParameters {
233-
Tensor data[NUM_PARAMETER_TENSORS];
234-
};
235-
236-
struct GPUActivations {
237-
Tensor data[NUM_ACTIVATION_TENSORS];
238-
};
239-
240-
241262
void gpu_alloc(Context& ctx, Tensor* tensors, size_t* sizes, size_t n) {
242263
for (size_t i = 0; i < n; i++) {
243264
tensors[i] = createTensor(ctx, Shape{sizes[i]}, kf32);
@@ -325,7 +346,7 @@ void gpt2_build_from_checkpoint(Context& ctx, GPT2 *model, const char* checkpoin
325346
model->num_parameters = num_parameters;
326347

327348
// read in all the parameters from file
328-
malloc_and_point_parameters(ctx, &model->params, model->param_sizes);
349+
malloc_and_point_parameters(ctx, model->config, &model->params, model->param_sizes);
329350
model->params_memory = (float*)mallocCheck(num_parameters * sizeof(float));
330351
freadCheck(model->params_memory, sizeof(float), num_parameters, model_file);
331352
fcloseCheck(model_file);
@@ -428,7 +449,7 @@ void gpt2_forward(Context& ctx, GPT2 *model, Tensor& inputs, Tensor& targets, si
428449
printf("num_activations: %zu\n", num_activations);
429450
model->num_activations = num_activations;
430451
printf("Allocating %.2f MB for activations\n", num_activations * sizeof(float) / (1024.0f * 1024.0f));
431-
malloc_and_point_activations(ctx, &model->acts, model->act_sizes);
452+
malloc_and_point_activations(ctx, model->config, &model->acts, model->act_sizes);
432453
// also create memory for caching inputs and targets
433454
//model->inputs = (int*)mallocCheck(B * T * sizeof(int));
434455
//model->targets = (int*)mallocCheck(B * T * sizeof(int)); // might be unused if we never have targets but it's small
@@ -664,8 +685,8 @@ void gpt2_backward(Context& ctx, GPT2 *model) {
664685
// lazily allocate the memory for gradients of the weights and activations, if needed
665686
if (model->grads_memory == NULL) {
666687
printf("Allocating %.2f MB for gradients\n", model->num_parameters * sizeof(float) / (1024.0f * 1024.0f));
667-
malloc_and_point_parameters(ctx, &model->grads, model->param_sizes);
668-
malloc_and_point_activations(ctx, &model->grads_acts, model->act_sizes);
688+
malloc_and_point_parameters(ctx, model->config, &model->grads, model->param_sizes);
689+
malloc_and_point_activations(ctx, model->config, &model->grads_acts, model->act_sizes);
669690
gpt2_zero_grad(model);
670691
}
671692

0 commit comments

Comments
 (0)