Skip to content

Commit 5acb38a

Browse files
committed
add qwen3 moe blockwise fp8 test to blackwell testing DeepGemm and TRTLLM
Signed-off-by: Jhao-Ting Chen <[email protected]>
1 parent c177aa8 commit 5acb38a

File tree

1 file changed

+32
-0
lines changed

1 file changed

+32
-0
lines changed

tests/integration/defs/accuracy/test_llm_api_pytorch.py

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2859,6 +2859,38 @@ def test_fp8(self, tp_size, pp_size, ep_size, attention_dp, cuda_graph,
28592859
task = GSM8K(self.MODEL_NAME)
28602860
task.evaluate(llm)
28612861

2862+
@skip_pre_hopper
2863+
@pytest.mark.skip_less_device(8)
2864+
@pytest.mark.parametrize(
2865+
"tp_size,pp_size,ep_size,attention_dp,cuda_graph,overlap_scheduler,moe_backend",
2866+
[(8, 1, 8, True, True, True, "DEEPGEMM"),
2867+
(8, 1, 8, False, True, True, "DEEPGEMM"),
2868+
(8, 1, 8, True, True, True, "TRTLLM"),
2869+
(8, 1, 8, False, True, True, "TRTLLM")],
2870+
ids=[
2871+
"latency_deepgemm", "throughput_latency_deepgemm", "latency_trtllm",
2872+
"throughput_latency_trtllm"
2873+
])
2874+
def test_fp8_block_scales(self, tp_size, pp_size, ep_size, attention_dp,
2875+
cuda_graph, overlap_scheduler, moe_backend):
2876+
pytorch_config = dict(
2877+
disable_overlap_scheduler=not overlap_scheduler,
2878+
cuda_graph_config=CudaGraphConfig() if cuda_graph else None,
2879+
moe_config=MoeConfig(backend=moe_backend))
2880+
2881+
kv_cache_config = KvCacheConfig(free_gpu_memory_fraction=0.6)
2882+
with LLM(f"{llm_models_root()}/Qwen3/Qwen3-235B-A22B-FP8",
2883+
tensor_parallel_size=tp_size,
2884+
pipeline_parallel_size=pp_size,
2885+
moe_expert_parallel_size=ep_size,
2886+
**pytorch_config,
2887+
enable_attention_dp=attention_dp,
2888+
kv_cache_config=kv_cache_config) as llm:
2889+
task = MMLU(self.MODEL_NAME)
2890+
task.evaluate(llm)
2891+
task = GSM8K(self.MODEL_NAME)
2892+
task.evaluate(llm)
2893+
28622894
@skip_pre_blackwell
28632895
@pytest.mark.skip_less_mpi_world_size(8)
28642896
@pytest.mark.parametrize(

0 commit comments

Comments
 (0)