Skip to content

Commit ee1cc26

Browse files
shubhagr-qcqcdipankar
authored andcommitted
Fixes in QNN Compilation path. (#454)
1. Fix generate_qnn_specialization to generate correct custom IO for VLM multimodal tests. 2. Added --target_backend AIC as default parameter in QNN Converter 3. Added QNN Multimodal Tests stage in Jenkins. --------- Signed-off-by: Shubham Agrawal <[email protected]>
1 parent 1fab2bc commit ee1cc26

File tree

4 files changed

+125
-44
lines changed

4 files changed

+125
-44
lines changed

QEfficient/utils/constants.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -138,7 +138,7 @@ class QnnConstants:
138138
# Converter Arguments
139139
FLOAT_BITWIDTH = 16
140140
FLOAT_BIAS_BITWIDTH = 32
141-
CONVERTER_DEFAULT_ARGS = "--preserve_io_datatype --onnx_skip_simplification "
141+
CONVERTER_DEFAULT_ARGS = "--preserve_io_datatype --onnx_skip_simplification --target_backend AIC "
142142

143143
# Context-Binary-Generator Arguments
144144
LOG_LEVEL = "error"

QEfficient/utils/generate_qnn_network_specialization_config.py

Lines changed: 28 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -66,55 +66,44 @@ def generate_qnn_specialization(
6666
raise AttributeError(f"ERROR: {input_shape} Shape not Found")
6767
shapes.append(shape)
6868

69-
# Filling shape value for nodes with shape size != 2, example: past_key / past_value nodes.
70-
if len(shapes) != 2:
69+
shape_list = []
70+
prefill_decode_shapes = False
71+
if len(specializations) > 1 and (node.name in ["input_ids", "position_ids"]):
72+
prefill_decode_shapes = True
73+
for input_shape in shapes:
74+
# If shape contains the parameter string, it value is extracted from the specialization file.
75+
if isinstance(input_shape, str):
76+
if input_shape in specializations[0]:
77+
shape_list.append(int(specializations[0][input_shape]))
78+
if (
79+
not prefill_decode_shapes
80+
and len(specializations) > 1
81+
and input_shape in specializations[1]
82+
and specializations[0][input_shape] != specializations[1][input_shape]
83+
):
84+
prefill_decode_shapes = True
85+
else:
86+
raise AttributeError(f"ERROR: {input_shape} is required in specializations")
87+
# If shape contains the value, then that value is used as it is.
88+
else:
89+
shape_list.append(input_shape)
90+
# Calculated shape is now assigned to the input node.
91+
input_info["Shape"] = str(shape_list).replace("[", "(").replace("]", ")")
92+
93+
if prefill_decode_shapes:
7194
shape_list = []
7295
for input_shape in shapes:
7396
# If shape contains the parameter string, it value is extracted from the specialization file.
7497
if isinstance(input_shape, str):
75-
if input_shape in specializations[0]:
76-
shape_list.append(int(specializations[0][input_shape]))
98+
if input_shape in specializations[1]:
99+
shape_list.append(int(specializations[1][input_shape]))
77100
else:
78101
raise AttributeError(f"ERROR: {input_shape} is required in specializations")
79102
# If shape contains the value, then that value is used as it is.
80103
else:
81104
shape_list.append(input_shape)
82-
83105
# Calculated shape is now assigned to the input node.
84-
input_info["Shape"] = str(shape_list).replace("[", "(").replace("]", ")")
85-
# If shape value for nodes is with shape size == 2, example: input_ids, position_ids, etc.
86-
else:
87-
shape_list = []
88-
for input_shape in shapes:
89-
if isinstance(input_shape, str):
90-
if input_shape in specializations[0]:
91-
shape_list.append(int(specializations[0][input_shape]))
92-
else:
93-
raise AttributeError(f"ERROR: {input_shape} is required in specializations")
94-
else:
95-
shape_list.append(input_shape)
96-
# If specializations file contains more than one parameters list, then first list is used for prefill and second one for decode graph.
97-
if len(specializations) > 1:
98-
prefill_shape_list = shape_list
99-
decode_shape_list = []
100-
for input_shape in shapes:
101-
if isinstance(input_shape, str):
102-
if input_shape in specializations[1]:
103-
decode_shape_list.append(int(specializations[1][input_shape]))
104-
else:
105-
raise AttributeError(f"ERROR: {input_shape} is required in specializations")
106-
else:
107-
decode_shape_list.append(input_shape)
108-
109-
input_info["Shape"] = (
110-
str(prefill_shape_list).replace("[", "(").replace("]", ")")
111-
+ ", "
112-
+ str(decode_shape_list).replace("[", "(").replace("]", ")")
113-
)
114-
115-
# If specializations file contains only one parameters list, then that list is used for decode graph information.
116-
else:
117-
input_info["Shape"] = str(shape_list).replace("[", "(").replace("]", ")")
106+
input_info["Shape"] += ", " + str(shape_list).replace("[", "(").replace("]", ")")
118107

119108
# Finally, input node is created with its name, and desired model parameters {DataType, Shape}
120109
input_nodes_info.append({"Name": node.name, "Desired Model Parameters": input_info})

scripts/Jenkinsfile

Lines changed: 21 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -106,7 +106,7 @@ pipeline {
106106
stage('vLLM Tests') {
107107
steps {
108108
catchError(buildResult: 'FAILURE', stageResult: 'FAILURE') {
109-
build job: 'qefficient_vllm_upstream',
109+
build job: 'qefficient_vllm_upstream',
110110
parameters: [string(name: 'NAME', value: "${BUILD_TAG}")],
111111
propagate: true,
112112
wait: true
@@ -144,13 +144,32 @@ pipeline {
144144
mkdir -p $PWD/Qnn_non_cli &&
145145
export TOKENIZERS_PARALLELISM=false &&
146146
export QEFF_HOME=$PWD/Qnn_non_cli &&
147-
pytest tests -m '(not cli) and (qnn) and (on_qaic)' --ignore tests/vllm --junitxml=tests/tests_log5.xml &&
147+
pytest tests -m '(not cli) and (qnn) and (on_qaic) and (not multimodal)' --ignore tests/vllm --junitxml=tests/tests_log5.xml &&
148148
junitparser merge tests/tests_log5.xml tests/tests_log.xml &&
149149
deactivate"
150150
'''
151151
}
152152
}
153153
}
154+
stage('QNN MultiModal Tests') {
155+
steps {
156+
timeout(time: 60, unit: 'MINUTES') {
157+
sh '''
158+
sudo docker exec ${BUILD_TAG} bash -c "
159+
source /qnn_sdk/bin/envsetup.sh &&
160+
source /qnn_sdk/bin/envcheck -c &&
161+
cd /efficient-transformers &&
162+
. preflight_qeff/bin/activate &&
163+
mkdir -p $PWD/Non_cli_qnn_multimodal &&
164+
export TOKENIZERS_PARALLELISM=false &&
165+
export QEFF_HOME=$PWD/Non_cli_qnn_multimodal &&
166+
pytest tests -m '(not cli) and (on_qaic) and (multimodal) and (qnn)' --ignore tests/vllm -n 4 --junitxml=tests/tests_log7.xml &&
167+
junitparser merge tests/tests_log7.xml tests/tests_log.xml &&
168+
deactivate"
169+
'''
170+
}
171+
}
172+
}
154173
}
155174

156175
post {

tests/transformers/models/test_image_text_to_text_models.py

Lines changed: 75 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,8 +5,9 @@
55
#
66
# ----------------------------------------------------------------------------
77

8+
import os
89
from io import BytesIO
9-
from typing import List
10+
from typing import List, Optional
1011

1112
import pytest
1213
import requests
@@ -23,7 +24,8 @@
2324

2425
from QEfficient.transformers.models.modeling_auto import QEFFAutoModelForCausalLM, QEFFAutoModelForImageTextToText
2526
from QEfficient.utils import hf_download
26-
from QEfficient.utils._utils import get_num_layers_vlm
27+
from QEfficient.utils._utils import create_json, get_num_layers_vlm
28+
from QEfficient.utils.constants import QnnConstants
2729
from QEfficient.utils.device_utils import get_available_device_id
2830
from QEfficient.utils.run_utils import ApiRunnerInternVL, ApiRunnerVlm
2931
from QEfficient.utils.test_utils import InternProcessor
@@ -198,6 +200,8 @@ def check_image_text_to_text_pytorch_vs_kv_vs_ort_vs_ai100(
198200
n_layer: int = 1,
199201
kv_offload: bool = False,
200202
num_devices: int = 1,
203+
enable_qnn: Optional[bool] = False,
204+
qnn_config: Optional[str] = None,
201205
):
202206
model_config = {"model_name": model_name}
203207
model_config["img_size"] = img_size
@@ -259,6 +263,8 @@ def check_image_text_to_text_pytorch_vs_kv_vs_ort_vs_ai100(
259263
prefill_seq_len=prompt_len,
260264
ctx_len=ctx_len,
261265
mxfp6=False,
266+
enable_qnn=enable_qnn,
267+
qnn_config=qnn_config,
262268
)
263269
inputs = processor(images=image, text=prompt, return_tensors="pt")
264270
if "pixel_values" in inputs:
@@ -281,6 +287,8 @@ def check_intern_image_text_to_text_pytorch_vs_kv_vs_ort_vs_ai100(
281287
n_layer: int = 1,
282288
kv_offload: bool = False,
283289
num_devices: int = 1,
290+
enable_qnn: Optional[bool] = False,
291+
qnn_config: Optional[str] = None,
284292
):
285293
model_config = {"model_name": model_name}
286294

@@ -346,6 +354,8 @@ def check_intern_image_text_to_text_pytorch_vs_kv_vs_ort_vs_ai100(
346354
prefill_seq_len=prompt_len,
347355
ctx_len=ctx_len,
348356
mxfp6=False,
357+
enable_qnn=enable_qnn,
358+
qnn_config=qnn_config,
349359
)
350360
print("QPC Outputs (QAIC):")
351361
output = qeff_model.generate(inputs=inputs, generation_len=NEW_GENERATION_TOKENS, streamer=streamer)
@@ -381,6 +391,42 @@ def test_image_text_to_text_pytorch_vs_kv_vs_ort_vs_ai100(
381391
)
382392

383393

394+
@pytest.mark.on_qaic
395+
@pytest.mark.qnn
396+
@pytest.mark.multimodal
397+
@pytest.mark.parametrize(
398+
"model_name, kv_offload, batch_size, prompt_len, ctx_len, img_size, img_url, query, n_layer", test_models_config
399+
)
400+
def test_image_text_to_text_pytorch_vs_kv_vs_ort_vs_ai100_qnn(
401+
model_name, kv_offload, batch_size, prompt_len, ctx_len, img_size, img_url, query, n_layer
402+
):
403+
"""
404+
Test function to validate the PyTorch model, the PyTorch model after KV changes, the ONNX model, and the Cloud AI 100 model, without continuous batching.
405+
``Mandatory`` Args:
406+
:model_name (str): Hugging Face Model Card name, Example: ``gpt2``
407+
"""
408+
if model_name == "meta-llama/Llama-4-Scout-17B-16E-Instruct":
409+
pytest.skip("QNN is not supported for Llama 4 Scout models")
410+
411+
qnn_config_json_path = os.path.join(os.getcwd(), "qnn_config.json")
412+
create_json(qnn_config_json_path, QnnConstants.QNN_SAMPLE_CONFIG)
413+
414+
check_image_text_to_text_pytorch_vs_kv_vs_ort_vs_ai100(
415+
model_name=model_name,
416+
prompt_len=prompt_len,
417+
ctx_len=ctx_len,
418+
max_gen_len=NEW_GENERATION_TOKENS,
419+
img_size=img_size,
420+
img_url=img_url,
421+
query=query,
422+
n_layer=n_layer,
423+
batch_size=batch_size,
424+
kv_offload=kv_offload,
425+
enable_qnn=True,
426+
qnn_config=qnn_config_json_path,
427+
)
428+
429+
384430
@pytest.mark.on_qaic
385431
@pytest.mark.multimodal
386432
@pytest.mark.parametrize(
@@ -400,3 +446,30 @@ def test_image_text_to_text_intern_pytorch_vs_kv_vs_ort_vs_ai100(
400446
batch_size=batch_size,
401447
kv_offload=kv_offload,
402448
)
449+
450+
451+
@pytest.mark.on_qaic
452+
@pytest.mark.qnn
453+
@pytest.mark.multimodal
454+
@pytest.mark.parametrize(
455+
"model_name, kv_offload, batch_size, prompt_len, ctx_len, img_url, query, n_layer", intern_model_config
456+
)
457+
def test_image_text_to_text_intern_pytorch_vs_kv_vs_ort_vs_ai100_qnn(
458+
model_name, kv_offload, batch_size, prompt_len, ctx_len, img_url, query, n_layer
459+
):
460+
qnn_config_json_path = os.path.join(os.getcwd(), "qnn_config.json")
461+
create_json(qnn_config_json_path, QnnConstants.QNN_SAMPLE_CONFIG)
462+
463+
check_intern_image_text_to_text_pytorch_vs_kv_vs_ort_vs_ai100(
464+
model_name=model_name,
465+
prompt_len=prompt_len,
466+
ctx_len=ctx_len,
467+
max_gen_len=NEW_GENERATION_TOKENS,
468+
img_url=img_url,
469+
query=query,
470+
n_layer=n_layer,
471+
batch_size=batch_size,
472+
kv_offload=kv_offload,
473+
enable_qnn=True,
474+
qnn_config=qnn_config_json_path,
475+
)

0 commit comments

Comments
 (0)