Skip to content

Commit cd4cece

Browse files
Add the duration-time of matmul_forward_dummy to compare GPU's one with CPU's one
1 parent 16a0dbc commit cd4cece

File tree

1 file changed

+5
-2
lines changed

1 file changed

+5
-2
lines changed

experimental/kernels/unittest_llmc/unittest_kernels.cpp

+5-2
Original file line numberDiff line numberDiff line change
@@ -327,7 +327,10 @@ void MATMUL_FORWARD_GPU(float* out,
327327
}
328328
if (debug) {
329329
out_exp = new float[B*T*OC];
330-
matmul_forward_dummy(out_exp, inp, weight, bias, B, T, C, OC);
330+
{
331+
DurationTime duration("matmul_forward_cpu", verbose);
332+
matmul_forward_dummy(out_exp, inp, weight, bias, B, T, C, OC);
333+
}
331334
}
332335
struct MatmulParams {
333336
uint32_t B;
@@ -421,8 +424,8 @@ void MATMUL_FORWARD_GPU(float* out,
421424
DurationTime duration("matmul_forward_gpu", verbose);
422425
dispatchKernel(ctx, op, promise);
423426
wait(ctx, future);
424-
toCPU(ctx, out_o, out, b * t * oc * sizeof(float));
425427
}
428+
toCPU(ctx, out_o, out, b * t * oc * sizeof(float));
426429
} else {
427430
DurationTime duration("matmul_forward_cpu", verbose);
428431
matmul_forward_dummy(out, inp, weight, bias, B, T, C, OC);

0 commit comments

Comments
 (0)