Update benchmark_triton.py

mobicham · web-flow · commit 6a2035395c90 · 2025-01-21T14:58:11.000+01:00
diff --git a/examples/benchmark_triton.py b/examples/benchmark_triton.py
@@ -16,9 +16,12 @@
     pass
 
 #GemLite
-from gemlite.core import GemLiteLinearTriton, DType, set_autotune
+from gemlite.core import GemLiteLinearTriton, DType, set_autotune, GEMLITE_ACC_DTYPE
 set_autotune({'GEMV_REVSPLITK':True, 'GEMV_SPLITK': True, 'GEMV':True, 'GEMM_SPLITK':True, 'GEMM':True}, exhaustive=True, use_cuda_graph=False)
 
+GEMLITE_ACC_DTYPE[DType.FP16] = DType.FP32 #For A100/H100
+#GEMLITE_ACC_DTYPE[DType.FP16] = DType.FP16 #For 3090/4090
+
 device = 'cuda:0'
 compute_dtype = torch.float16