NVIDIA · cascade812 · Jun 14, 2026 · Jun 15, 2026 · Jun 15, 2026 · Jun 15, 2026
@@ -42,7 +42,7 @@ optimum
 datasets==3.1.0
 evaluate
 mpmath>=1.3.0
-click
+click>=8.3.1,<8.4
 click_option_group
 aenum
 pyzmq

@@ -37,6 +37,17 @@
 _FAKE_QWEN3_VL_30B_A3B_FP8_SENTINEL = "qwen3_vl_30b_a3b_fp8_fake"
 _FAKE_CHECKPOINT_MARKER = ".tllm_fake_checkpoint"
 _MAX_MODEL_INIT_WORKERS = 3
+_MODEL_DIR_PARAMS = [
+    pytest.param(_LLAVA_DIR, id="llava_7b"),
+    pytest.param(_QWEN_2_5_VL_DIR, id="qwen2.5_3b"),
+    pytest.param(_QWEN_3_VL_DIR, id="qwen3_2b"),
+    pytest.param(_FAKE_QWEN3_VL_30B_A3B_FP8_SENTINEL,
+                 id="qwen3_30b_a3b_fp8"),
+]
+_PD_DISAGG_PARAMS = [
+    pytest.param(False, id="no_pd_disagg"),
+    pytest.param(True, id="pd_disagg"),
+]
 
 
 def _instantiate_models(*initializers):
@@ -259,21 +270,13 @@ def test_kv_event_mm_keys_with_reuse(prompts, expected_num_duplicates):
         f"got {num_duplicates}. Offsets: {mm_keys_offsets}")
 
 
-@pytest.fixture(
-    scope="module",
-    params=[
-        pytest.param(_LLAVA_DIR, id="llava_7b"),
-        pytest.param(_QWEN_2_5_VL_DIR, id="qwen2.5_3b"),
-        pytest.param(_QWEN_3_VL_DIR, id="qwen3_2b"),
-        pytest.param(_FAKE_QWEN3_VL_30B_A3B_FP8_SENTINEL,
-                     id="qwen3_30b_a3b_fp8"),
-    ],
-)
+@pytest.fixture(scope="module")
 def model_dir(request, tmp_path_factory: pytest.TempPathFactory) -> Path:
-    if request.param == _FAKE_QWEN3_VL_30B_A3B_FP8_SENTINEL:
+    model_dir_param = getattr(request, "param", _LLAVA_DIR)
+    if model_dir_param == _FAKE_QWEN3_VL_30B_A3B_FP8_SENTINEL:
         return _create_fake_qwen3_vl_30b_a3b_fp8_dir(tmp_path_factory,
                                                      _QWEN_3_VL_DIR)
-    return request.param
+    return model_dir_param
 
 
 @pytest.mark.parametrize(
@@ -631,13 +634,9 @@ def test_kv_event_mm_keys_with_very_long_uuid():
             f"found {len(matching)} times in {mm_key_hashes}")
 
 
-@pytest.fixture(scope="module",
-                params=[
-                    pytest.param(False, id="no_pd_disagg"),
-                    pytest.param(True, id="pd_disagg"),
-                ])
+@pytest.fixture(scope="module")
 def pd_disagg(request) -> bool:
-    return request.param
+    return getattr(request, "param", False)
 
 
 @pytest.fixture(scope="module")
@@ -778,6 +777,8 @@ def _assert_handles_are_different(x: dict | None, y: dict | None) -> None:
         assert x[key] != y[key]
 
 
+@pytest.mark.parametrize("model_dir", _MODEL_DIR_PARAMS, indirect=True)
+@pytest.mark.parametrize("pd_disagg", _PD_DISAGG_PARAMS, indirect=True)
 @pytest.mark.threadleak(enabled=False)
 def test_single_request_chat_multiple_images(
     pd_disagg: bool,

diff --git a/tests/unittest/conftest.py b/tests/unittest/conftest.py
@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: Copyright (c) 2022-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-FileCopyrightText: Copyright (c) 2022-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 # SPDX-License-Identifier: Apache-2.0
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
@@ -337,12 +337,12 @@ def torch_empty_cache() -> None:
         torch.cuda.empty_cache()
 
 
-@pytest.fixture(scope="module", params=[2, 4, 8])
+@pytest.fixture(scope="module")
 def mpi_pool_executor(request):
     """
     Start an MPIPoolExecutor with `request.param` workers.
     """
-    num_workers = request.param
+    num_workers = getattr(request, "param", 2)
     with MPIPoolExecutor(num_workers) as executor:
         # make the number of workers visible to tests
         setattr(executor, "num_workers", num_workers)

diff --git a/tests/unittest/llmapi/test_llm_pytorch.py b/tests/unittest/llmapi/test_llm_pytorch.py
@@ -67,7 +67,7 @@
 
 
 @force_ampere
-@pytest.mark.parametrize("enable_chunked_prefill,", [False, True])
+@pytest.mark.parametrize("enable_chunked_prefill", [False, True])
 @pytest.mark.part2
 def test_tinyllama_logits_processor(enable_chunked_prefill):
     tinyllama_logits_processor_test_harness(