diff --git a/requirements.txt b/requirements.txt index 9fe0fbf59814..99fe8fcb8cbe 100644 --- a/requirements.txt +++ b/requirements.txt @@ -42,7 +42,7 @@ optimum datasets==3.1.0 evaluate mpmath>=1.3.0 -click +click>=8.3.1,<8.4 click_option_group aenum pyzmq diff --git a/tests/unittest/_torch/multimodal/test_mm_encoder_standalone.py b/tests/unittest/_torch/multimodal/test_mm_encoder_standalone.py index afedd960f9c3..221f9daafb8a 100644 --- a/tests/unittest/_torch/multimodal/test_mm_encoder_standalone.py +++ b/tests/unittest/_torch/multimodal/test_mm_encoder_standalone.py @@ -37,6 +37,16 @@ _FAKE_QWEN3_VL_30B_A3B_FP8_SENTINEL = "qwen3_vl_30b_a3b_fp8_fake" _FAKE_CHECKPOINT_MARKER = ".tllm_fake_checkpoint" _MAX_MODEL_INIT_WORKERS = 3 +_MODEL_DIR_PARAMS = [ + pytest.param(_LLAVA_DIR, id="llava_7b"), + pytest.param(_QWEN_2_5_VL_DIR, id="qwen2.5_3b"), + pytest.param(_QWEN_3_VL_DIR, id="qwen3_2b"), + pytest.param(_FAKE_QWEN3_VL_30B_A3B_FP8_SENTINEL, id="qwen3_30b_a3b_fp8"), +] +_PD_DISAGG_PARAMS = [ + pytest.param(False, id="no_pd_disagg"), + pytest.param(True, id="pd_disagg"), +] def _instantiate_models(*initializers): @@ -259,21 +269,13 @@ def test_kv_event_mm_keys_with_reuse(prompts, expected_num_duplicates): f"got {num_duplicates}. Offsets: {mm_keys_offsets}") -@pytest.fixture( - scope="module", - params=[ - pytest.param(_LLAVA_DIR, id="llava_7b"), - pytest.param(_QWEN_2_5_VL_DIR, id="qwen2.5_3b"), - pytest.param(_QWEN_3_VL_DIR, id="qwen3_2b"), - pytest.param(_FAKE_QWEN3_VL_30B_A3B_FP8_SENTINEL, - id="qwen3_30b_a3b_fp8"), - ], -) +@pytest.fixture(scope="module") def model_dir(request, tmp_path_factory: pytest.TempPathFactory) -> Path: - if request.param == _FAKE_QWEN3_VL_30B_A3B_FP8_SENTINEL: + model_dir_param = getattr(request, "param", _LLAVA_DIR) + if model_dir_param == _FAKE_QWEN3_VL_30B_A3B_FP8_SENTINEL: return _create_fake_qwen3_vl_30b_a3b_fp8_dir(tmp_path_factory, _QWEN_3_VL_DIR) - return request.param + return model_dir_param @pytest.mark.parametrize( @@ -631,13 +633,9 @@ def test_kv_event_mm_keys_with_very_long_uuid(): f"found {len(matching)} times in {mm_key_hashes}") -@pytest.fixture(scope="module", - params=[ - pytest.param(False, id="no_pd_disagg"), - pytest.param(True, id="pd_disagg"), - ]) +@pytest.fixture(scope="module") def pd_disagg(request) -> bool: - return request.param + return getattr(request, "param", False) @pytest.fixture(scope="module") @@ -778,6 +776,8 @@ def _assert_handles_are_different(x: dict | None, y: dict | None) -> None: assert x[key] != y[key] +@pytest.mark.parametrize("model_dir", _MODEL_DIR_PARAMS, indirect=True) +@pytest.mark.parametrize("pd_disagg", _PD_DISAGG_PARAMS, indirect=True) @pytest.mark.threadleak(enabled=False) def test_single_request_chat_multiple_images( pd_disagg: bool, diff --git a/tests/unittest/conftest.py b/tests/unittest/conftest.py index 6f14ba5ad023..3d425892dd93 100644 --- a/tests/unittest/conftest.py +++ b/tests/unittest/conftest.py @@ -1,4 +1,4 @@ -# SPDX-FileCopyrightText: Copyright (c) 2022-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 2022-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -337,12 +337,12 @@ def torch_empty_cache() -> None: torch.cuda.empty_cache() -@pytest.fixture(scope="module", params=[2, 4, 8]) +@pytest.fixture(scope="module") def mpi_pool_executor(request): """ Start an MPIPoolExecutor with `request.param` workers. """ - num_workers = request.param + num_workers = getattr(request, "param", 2) with MPIPoolExecutor(num_workers) as executor: # make the number of workers visible to tests setattr(executor, "num_workers", num_workers) diff --git a/tests/unittest/llmapi/test_llm_pytorch.py b/tests/unittest/llmapi/test_llm_pytorch.py index ece8990f955c..f874cc4c8e60 100644 --- a/tests/unittest/llmapi/test_llm_pytorch.py +++ b/tests/unittest/llmapi/test_llm_pytorch.py @@ -67,7 +67,7 @@ @force_ampere -@pytest.mark.parametrize("enable_chunked_prefill,", [False, True]) +@pytest.mark.parametrize("enable_chunked_prefill", [False, True]) @pytest.mark.part2 def test_tinyllama_logits_processor(enable_chunked_prefill): tinyllama_logits_processor_test_harness(