Skip to content

Commit

Permalink
Avoid downloading HF models in CI pipelines (#1263)
Browse files Browse the repository at this point in the history
  • Loading branch information
baijumeswani authored Feb 20, 2025
1 parent 16fb079 commit fba80c1
Show file tree
Hide file tree
Showing 11 changed files with 67 additions and 62 deletions.
4 changes: 0 additions & 4 deletions .github/workflows/linux-cpu-x64-build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -84,10 +84,6 @@ jobs:
python3 -m pip install -r test/python/cpu/ort/requirements.txt --user
python3 -m pip install --user --no-index --no-deps --find-links build/cpu/wheel onnxruntime_genai
- name: Use Dummy HuggingFace Token
run: |
echo "HF_TOKEN=12345" >> $GITHUB_ENV
- name: Verify Build Artifacts
if: always()
continue-on-error: true
Expand Down
4 changes: 0 additions & 4 deletions .github/workflows/linux-cpu-x64-nightly-build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -55,10 +55,6 @@ jobs:
python3 -m pip install -r test/python/cpu/ort/requirements.txt --user
python3 -m pip install build/cpu/wheel/onnxruntime_genai*.whl --no-deps
- name: Use Dummy HuggingFace Token
run: |
echo "HF_TOKEN=12345" >> $GITHUB_ENV
- name: Run the python tests
run: |
python3 test/python/test_onnxruntime_genai.py --cwd test/python --test_models test/test_models --e2e
Expand Down
4 changes: 0 additions & 4 deletions .github/workflows/linux-gpu-x64-build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -109,10 +109,6 @@ jobs:
bash -c " \
/usr/bin/cmake --build --preset linux_gcc_cuda_release"
- name: Use Dummy HuggingFace Token
run: |
echo "HF_TOKEN=12345" >> $GITHUB_ENV
- name: Install the onnxruntime-genai Python wheel and run python test
run: |
echo "Installing the onnxruntime-genai Python wheel and running the Python tests"
Expand Down
4 changes: 0 additions & 4 deletions .github/workflows/win-cpu-x64-build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -91,10 +91,6 @@ jobs:
python3 -m pip install -r test\python\cpu\ort\requirements.txt --user
python3 -m pip install (Get-ChildItem ("$env:binaryDir\wheel\*.whl")) --no-deps
- name: Use Dummy HuggingFace Token
run: |
Add-Content -Path $env:GITHUB_ENV -Value "HF_TOKEN=12345"
- name: Run the Python Tests
run: |
python test/python/test_onnxruntime_genai.py --cwd "test\python" --test_models "test\test_models"
Expand Down
4 changes: 0 additions & 4 deletions .github/workflows/win-cuda-x64-build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -80,10 +80,6 @@ jobs:
python -m pip install -r test\python\cuda\ort\requirements.txt
python -m pip install (Get-ChildItem ("$env:binaryDir\wheel\*.whl")) --no-deps
- name: Use Dummy HuggingFace Token
run: |
Add-Content -Path $env:GITHUB_ENV -Value "HF_TOKEN=12345"
- name: Run the Python Tests
run: |
python test/python/test_onnxruntime_genai.py --cwd "test\python" --test_models "test\test_models" --e2e
Expand Down
6 changes: 3 additions & 3 deletions examples/csharp/HelloPhi/HelloPhi.csproj
Original file line number Diff line number Diff line change
Expand Up @@ -10,9 +10,9 @@
</PropertyGroup>

<ItemGroup>
<PackageReference Include="Microsoft.ML.OnnxRuntimeGenAI" Version="0.7.0-dev" Condition=" '$(Configuration)' == 'Debug' OR '$(Configuration)' == 'Release' " />
<PackageReference Include="Microsoft.ML.OnnxRuntimeGenAI.Cuda" Version="0.7.0-dev" Condition=" '$(Configuration)' == 'Debug_Cuda' OR '$(Configuration)' == 'Release_Cuda' " />
<PackageReference Include="Microsoft.ML.OnnxRuntimeGenAI.DirectML" Version="0.7.0-dev" Condition=" '$(Configuration)' == 'Debug_DirectML' OR '$(Configuration)' == 'Release_DirectML' " />
<PackageReference Include="Microsoft.ML.OnnxRuntimeGenAI" Version="0.6.0" Condition=" '$(Configuration)' == 'Debug' OR '$(Configuration)' == 'Release' " />
<PackageReference Include="Microsoft.ML.OnnxRuntimeGenAI.Cuda" Version="0.6.0" Condition=" '$(Configuration)' == 'Debug_Cuda' OR '$(Configuration)' == 'Release_Cuda' " />
<PackageReference Include="Microsoft.ML.OnnxRuntimeGenAI.DirectML" Version="0.6.0" Condition=" '$(Configuration)' == 'Debug_DirectML' OR '$(Configuration)' == 'Release_DirectML' " />
</ItemGroup>

<ItemGroup>
Expand Down
6 changes: 3 additions & 3 deletions examples/csharp/HelloPhi3V/HelloPhi3V.csproj
Original file line number Diff line number Diff line change
Expand Up @@ -9,9 +9,9 @@
</PropertyGroup>

<ItemGroup>
<PackageReference Include="Microsoft.ML.OnnxRuntimeGenAI" Version="0.7.0-dev" Condition=" '$(Configuration)' == 'Debug' OR '$(Configuration)' == 'Release' " />
<PackageReference Include="Microsoft.ML.OnnxRuntimeGenAI.Cuda" Version="0.7.0-dev" Condition=" '$(Configuration)' == 'Debug_Cuda' OR '$(Configuration)' == 'Release_Cuda' " />
<PackageReference Include="Microsoft.ML.OnnxRuntimeGenAI.DirectML" Version="0.7.0-dev" Condition=" '$(Configuration)' == 'Debug_DirectML' OR '$(Configuration)' == 'Release_DirectML' " />
<PackageReference Include="Microsoft.ML.OnnxRuntimeGenAI" Version="0.6.0" Condition=" '$(Configuration)' == 'Debug' OR '$(Configuration)' == 'Release' " />
<PackageReference Include="Microsoft.ML.OnnxRuntimeGenAI.Cuda" Version="0.6.0" Condition=" '$(Configuration)' == 'Debug_Cuda' OR '$(Configuration)' == 'Release_Cuda' " />
<PackageReference Include="Microsoft.ML.OnnxRuntimeGenAI.DirectML" Version="0.6.0" Condition=" '$(Configuration)' == 'Debug_DirectML' OR '$(Configuration)' == 'Release_DirectML' " />
</ItemGroup>

</Project>
56 changes: 41 additions & 15 deletions test/python/_test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,33 +53,40 @@ def run_subprocess(


def get_model_paths():
# TODO: Uncomment the following models as needed in the CI pipeline.

hf_paths = {
"phi-2": "microsoft/phi-2",
"olmo": "amd/AMD-OLMo-1B-SFT-DPO",
"qwen": "Qwen/Qwen2.5-0.5B",
"phi-3.5": "microsoft/Phi-3.5-mini-instruct",
# "olmo": "amd/AMD-OLMo-1B-SFT-DPO",
"qwen-2.5": "Qwen/Qwen2.5-0.5B",
# "phi-3.5": "microsoft/Phi-3.5-mini-instruct",
# "llama-3.2": "meta-llama/Llama-3.2-1B-instruct",
"granite-3.0": "ibm-granite/granite-3.0-2b-instruct",
# "granite-3.0": "ibm-granite/granite-3.0-2b-instruct",
}

ci_data_path = os.path.join("/", "data", "ortgenai", "pytorch")
ci_data_path = None
if is_windows():
ci_data_path = os.path.join(R"C:\\", "data", "models", "ortgenai", "pytorch")
else:
ci_data_path = os.path.join(os.path.abspath(os.sep), "data", "ortgenai", "pytorch")

if not os.path.exists(ci_data_path):
return {}, hf_paths

# Note: If a model has over 4B parameters, please add a quantized version
# to `ci_paths` instead of `hf_paths` to reduce file size and testing time.
ci_paths = {
"llama-2": os.path.join(ci_data_path, "Llama-2-7B-Chat-GPTQ"),
"llama-3": os.path.join(ci_data_path, "Meta-Llama-3-8B-AWQ"),
"mistral-v0.2": os.path.join(ci_data_path, "Mistral-7B-Instruct-v0.2-GPTQ"),
# "llama-2": os.path.join(ci_data_path, "Llama-2-7B-Chat-GPTQ"),
# "llama-3": os.path.join(ci_data_path, "Meta-Llama-3-8B-AWQ"),
# "mistral-v0.2": os.path.join(ci_data_path, "Mistral-7B-Instruct-v0.2-GPTQ"),
"phi-2": os.path.join(ci_data_path, "phi2"),
"gemma-2b": os.path.join(ci_data_path, "gemma-1.1-2b-it"),
"gemma-7b": os.path.join(ci_data_path, "gemma-7b-it-awq"),
"phi-3-mini": os.path.join(ci_data_path, "phi3-mini-128k-instruct"),
"gemma-2-2b": os.path.join(ci_data_path, "gemma-2-2b-it"),
"llama-3.2": os.path.join(ci_data_path, "llama-3.2b-1b-instruct"),
# "gemma-2b": os.path.join(ci_data_path, "gemma-1.1-2b-it"),
# "gemma-7b": os.path.join(ci_data_path, "gemma-7b-it-awq"),
# "phi-3-mini": os.path.join(ci_data_path, "phi3-mini-128k-instruct"),
# "gemma-2-2b": os.path.join(ci_data_path, "gemma-2-2b-it"),
# "llama-3.2": os.path.join(ci_data_path, "llama-3.2b-1b-instruct"),
"qwen-2.5": os.path.join(ci_data_path, "qwen2.5-0.5b-instruct"),
"nemotron-mini": os.path.join(ci_data_path, "nemotron-mini-4b"),
# "nemotron-mini": os.path.join(ci_data_path, "nemotron-mini-4b"),
}

return ci_paths, hf_paths
Expand Down Expand Up @@ -123,22 +130,41 @@ def download_model(model_name, input_path, output_path, precision, device, one_l
run_subprocess(command).check_returncode()


def download_models(download_path, precision, device):
def download_models(download_path, precision, device, log):
log.debug(f"Downloading models to {download_path} with precision {precision} and device {device}")

ci_paths, hf_paths = get_model_paths()
output_paths = []

log.debug(f"Downloading {len(ci_paths)} PyTorch models and {len(hf_paths)} Hugging Face models")

# python -m onnxruntime_genai.models.builder -i <input_path> -o <output_path> -p <precision> -e <device>
for model_name, input_path in ci_paths.items():
output_path = os.path.join(download_path, model_name, precision, device)
log.debug(f"Downloading {model_name} from {input_path} to {output_path}")
if not os.path.exists(output_path):
download_model(None, input_path, output_path, precision, device)
output_paths.append(output_path)

# python -m onnxruntime_genai.models.builder -m <model_name> -o <output_path> -p <precision> -e <device>
for model_name, hf_name in hf_paths.items():
try:
from huggingface_hub import model_info
model_info(hf_name)
except ImportError:
log.warning("huggingface_hub is not installed. Skipping downloading hugging face models.")
continue
except Exception as e:
log.warning(f"Error: {e}. Skipping downloading hugging face models")
continue
output_path = os.path.join(download_path, model_name, precision, device)

log.debug(f"Downloading {model_name} from {hf_name} to {output_path}")

if not os.path.exists(output_path):
download_model(hf_name, "", output_path, precision, device)
output_paths.append(output_path)

log.info(f"Successfully downloaded {len(output_paths)} models")

return output_paths
15 changes: 14 additions & 1 deletion test/python/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,10 @@ def pytest_addoption(parser):


def get_path_for_model(data_path, model_name, precision, device):
return os.path.join(data_path, model_name, precision, device)
model_path = os.path.join(data_path, model_name, precision, device)
if not os.path.exists(model_path):
pytest.skip(f"Model {model_name} not found at {model_path}")
return model_path


@pytest.fixture
Expand Down Expand Up @@ -52,6 +55,16 @@ def llama_for(request):
)


@pytest.fixture
def qwen_for(request):
return functools.partial(
get_path_for_model,
request.config.getoption("--test_models"),
"qwen-2.5",
"int4",
)


@pytest.fixture
def path_for_model(request):
return functools.partial(
Expand Down
6 changes: 3 additions & 3 deletions test/python/test_onnxruntime_genai.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,11 +84,11 @@ def main():
if not (
sysconfig.get_platform().endswith("arm64") or sys.version_info.minor < 8
):
output_paths += download_models(os.path.abspath(args.test_models), "int4", "cpu")
output_paths += download_models(os.path.abspath(args.test_models), "int4", "cpu", log)
if og.is_cuda_available():
output_paths += download_models(os.path.abspath(args.test_models), "int4", "cuda")
output_paths += download_models(os.path.abspath(args.test_models), "int4", "cuda", log)
if og.is_dml_available():
output_paths += download_models(os.path.abspath(args.test_models), "int4", "dml")
output_paths += download_models(os.path.abspath(args.test_models), "int4", "dml", log)

# Run ONNX Runtime GenAI tests
run_onnxruntime_genai_api_tests(os.path.abspath(args.cwd), log, os.path.abspath(args.test_models))
Expand Down
20 changes: 3 additions & 17 deletions test/python/test_onnxruntime_genai_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -391,23 +391,9 @@ def test_get_output(test_data_path, relative_model_path):
sysconfig.get_platform().endswith("arm64") or sys.version_info.minor < 8,
reason="Python 3.8 is required for downloading models.",
)
@pytest.mark.parametrize(
"relative_model_path",
(
[
Path("qwen/int4/cpu"),
Path("qwen/int4/cuda"),
]
if og.is_cuda_available()
else [
Path("qwen/int4/cpu"),
]
),
)
def test_hidden_states(test_data_path, relative_model_path):
model_path = os.fspath(Path(test_data_path) / relative_model_path)

model = og.Model(model_path)
@pytest.mark.parametrize("device", devices)
def test_hidden_states(qwen_for, device):
model = og.Model(qwen_for(device))

search_params = og.GeneratorParams(model)
input_ids = np.array(
Expand Down

0 comments on commit fba80c1

Please sign in to comment.