5
5
#
6
6
# ----------------------------------------------------------------------------
7
7
8
+ import os
8
9
from io import BytesIO
9
- from typing import List
10
+ from typing import List , Optional
10
11
11
12
import pytest
12
13
import requests
23
24
24
25
from QEfficient .transformers .models .modeling_auto import QEFFAutoModelForCausalLM , QEFFAutoModelForImageTextToText
25
26
from QEfficient .utils import hf_download
26
- from QEfficient .utils ._utils import get_num_layers_vlm
27
+ from QEfficient .utils ._utils import create_json , get_num_layers_vlm
28
+ from QEfficient .utils .constants import QnnConstants
27
29
from QEfficient .utils .device_utils import get_available_device_id
28
30
from QEfficient .utils .run_utils import ApiRunnerInternVL , ApiRunnerVlm
29
31
from QEfficient .utils .test_utils import InternProcessor
@@ -198,6 +200,8 @@ def check_image_text_to_text_pytorch_vs_kv_vs_ort_vs_ai100(
198
200
n_layer : int = 1 ,
199
201
kv_offload : bool = False ,
200
202
num_devices : int = 1 ,
203
+ enable_qnn : Optional [bool ] = False ,
204
+ qnn_config : Optional [str ] = None ,
201
205
):
202
206
model_config = {"model_name" : model_name }
203
207
model_config ["img_size" ] = img_size
@@ -259,6 +263,8 @@ def check_image_text_to_text_pytorch_vs_kv_vs_ort_vs_ai100(
259
263
prefill_seq_len = prompt_len ,
260
264
ctx_len = ctx_len ,
261
265
mxfp6 = False ,
266
+ enable_qnn = enable_qnn ,
267
+ qnn_config = qnn_config ,
262
268
)
263
269
inputs = processor (images = image , text = prompt , return_tensors = "pt" )
264
270
if "pixel_values" in inputs :
@@ -281,6 +287,8 @@ def check_intern_image_text_to_text_pytorch_vs_kv_vs_ort_vs_ai100(
281
287
n_layer : int = 1 ,
282
288
kv_offload : bool = False ,
283
289
num_devices : int = 1 ,
290
+ enable_qnn : Optional [bool ] = False ,
291
+ qnn_config : Optional [str ] = None ,
284
292
):
285
293
model_config = {"model_name" : model_name }
286
294
@@ -346,6 +354,8 @@ def check_intern_image_text_to_text_pytorch_vs_kv_vs_ort_vs_ai100(
346
354
prefill_seq_len = prompt_len ,
347
355
ctx_len = ctx_len ,
348
356
mxfp6 = False ,
357
+ enable_qnn = enable_qnn ,
358
+ qnn_config = qnn_config ,
349
359
)
350
360
print ("QPC Outputs (QAIC):" )
351
361
output = qeff_model .generate (inputs = inputs , generation_len = NEW_GENERATION_TOKENS , streamer = streamer )
@@ -381,6 +391,42 @@ def test_image_text_to_text_pytorch_vs_kv_vs_ort_vs_ai100(
381
391
)
382
392
383
393
394
+ @pytest .mark .on_qaic
395
+ @pytest .mark .qnn
396
+ @pytest .mark .multimodal
397
+ @pytest .mark .parametrize (
398
+ "model_name, kv_offload, batch_size, prompt_len, ctx_len, img_size, img_url, query, n_layer" , test_models_config
399
+ )
400
+ def test_image_text_to_text_pytorch_vs_kv_vs_ort_vs_ai100_qnn (
401
+ model_name , kv_offload , batch_size , prompt_len , ctx_len , img_size , img_url , query , n_layer
402
+ ):
403
+ """
404
+ Test function to validate the PyTorch model, the PyTorch model after KV changes, the ONNX model, and the Cloud AI 100 model, without continuous batching.
405
+ ``Mandatory`` Args:
406
+ :model_name (str): Hugging Face Model Card name, Example: ``gpt2``
407
+ """
408
+ if model_name == "meta-llama/Llama-4-Scout-17B-16E-Instruct" :
409
+ pytest .skip ("QNN is not supported for Llama 4 Scout models" )
410
+
411
+ qnn_config_json_path = os .path .join (os .getcwd (), "qnn_config.json" )
412
+ create_json (qnn_config_json_path , QnnConstants .QNN_SAMPLE_CONFIG )
413
+
414
+ check_image_text_to_text_pytorch_vs_kv_vs_ort_vs_ai100 (
415
+ model_name = model_name ,
416
+ prompt_len = prompt_len ,
417
+ ctx_len = ctx_len ,
418
+ max_gen_len = NEW_GENERATION_TOKENS ,
419
+ img_size = img_size ,
420
+ img_url = img_url ,
421
+ query = query ,
422
+ n_layer = n_layer ,
423
+ batch_size = batch_size ,
424
+ kv_offload = kv_offload ,
425
+ enable_qnn = True ,
426
+ qnn_config = qnn_config_json_path ,
427
+ )
428
+
429
+
384
430
@pytest .mark .on_qaic
385
431
@pytest .mark .multimodal
386
432
@pytest .mark .parametrize (
@@ -400,3 +446,30 @@ def test_image_text_to_text_intern_pytorch_vs_kv_vs_ort_vs_ai100(
400
446
batch_size = batch_size ,
401
447
kv_offload = kv_offload ,
402
448
)
449
+
450
+
451
+ @pytest .mark .on_qaic
452
+ @pytest .mark .qnn
453
+ @pytest .mark .multimodal
454
+ @pytest .mark .parametrize (
455
+ "model_name, kv_offload, batch_size, prompt_len, ctx_len, img_url, query, n_layer" , intern_model_config
456
+ )
457
+ def test_image_text_to_text_intern_pytorch_vs_kv_vs_ort_vs_ai100_qnn (
458
+ model_name , kv_offload , batch_size , prompt_len , ctx_len , img_url , query , n_layer
459
+ ):
460
+ qnn_config_json_path = os .path .join (os .getcwd (), "qnn_config.json" )
461
+ create_json (qnn_config_json_path , QnnConstants .QNN_SAMPLE_CONFIG )
462
+
463
+ check_intern_image_text_to_text_pytorch_vs_kv_vs_ort_vs_ai100 (
464
+ model_name = model_name ,
465
+ prompt_len = prompt_len ,
466
+ ctx_len = ctx_len ,
467
+ max_gen_len = NEW_GENERATION_TOKENS ,
468
+ img_url = img_url ,
469
+ query = query ,
470
+ n_layer = n_layer ,
471
+ batch_size = batch_size ,
472
+ kv_offload = kv_offload ,
473
+ enable_qnn = True ,
474
+ qnn_config = qnn_config_json_path ,
475
+ )
0 commit comments