Skip to content

Commit c53bebd

Browse files
removed platform sdk dependency (#609)
This PR updates QEff to support QPC generation on systems without the Platform SDK by refactoring the module loading behavior. Users can now compile models and generate QPCs using QEff with only the Apps SDK installed. Background: Previously, both Apps SDK and Platform SDK were required to compile and generate QPCs using QEff. The goal is to allow QPC generation with only the Apps SDK installed for systems without Ultra cards. Changes: Refactored init.py and generation/cloud_infer.py to use lazy loading via importlib for qaicrt and aicapi. This ensures that Platform SDK-dependent modules are only loaded when explicitly needed, avoiding import errors during initialization and QPC generation. Signed-off-by: Sharvari Medhe <[email protected]> Co-authored-by: Hem Agnihotri <[email protected]>
1 parent 44636a6 commit c53bebd

File tree

2 files changed

+72
-59
lines changed

2 files changed

+72
-59
lines changed

QEfficient/__init__.py

Lines changed: 32 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -9,19 +9,49 @@
99
import warnings
1010

1111
import QEfficient.utils.model_registery # noqa: F401
12+
from QEfficient.base import (
13+
QEFFAutoModel,
14+
QEFFAutoModelForCausalLM,
15+
QEFFAutoModelForCTC,
16+
QEFFAutoModelForImageTextToText,
17+
QEFFAutoModelForSpeechSeq2Seq,
18+
QEFFCommonLoader,
19+
)
20+
from QEfficient.compile.compile_helper import compile
21+
from QEfficient.exporter.export_hf_to_cloud_ai_100 import qualcomm_efficient_converter
22+
from QEfficient.generation.text_generation_inference import cloud_ai_100_exec_kv
23+
from QEfficient.peft import QEffAutoPeftModelForCausalLM
24+
from QEfficient.transformers.transform import transform
1225
from QEfficient.utils import custom_format_warning
1326
from QEfficient.utils.logging_utils import logger
1427

28+
# Users can use QEfficient.export for exporting models to ONNX
29+
export = qualcomm_efficient_converter
30+
__all__ = [
31+
"transform",
32+
"export",
33+
"compile",
34+
"cloud_ai_100_exec_kv",
35+
"QEFFAutoModel",
36+
"QEFFAutoModelForCausalLM",
37+
"QEFFAutoModelForCTC",
38+
"QEffAutoPeftModelForCausalLM",
39+
"QEFFAutoModelForImageTextToText",
40+
"QEFFAutoModelForSpeechSeq2Seq",
41+
"QEFFCommonLoader",
42+
]
1543
# For faster downloads via hf_transfer
1644
# This code is put above import statements as this needs to be executed before
1745
# hf_transfer is imported (will happen on line 15 via leading imports)
1846
os.environ["HF_HUB_ENABLE_HF_TRANSFER"] = "1"
1947
# Placeholder for all non-transformer models registered in QEfficient
2048

21-
2249
# custom warning for the better logging experience
2350
warnings.formatwarning = custom_format_warning
2451

52+
# Conditionally import QAIC-related modules if the SDK is installed
53+
__version__ = "0.0.1.dev0"
54+
2555

2656
def check_qaic_sdk():
2757
"""Check if QAIC SDK is installed"""
@@ -37,40 +67,5 @@ def check_qaic_sdk():
3767
return False
3868

3969

40-
# Conditionally import QAIC-related modules if the SDK is installed
41-
__version__ = "0.0.1.dev0"
42-
43-
if check_qaic_sdk():
44-
from QEfficient.base import (
45-
QEFFAutoModel,
46-
QEFFAutoModelForCausalLM,
47-
QEFFAutoModelForCTC,
48-
QEFFAutoModelForImageTextToText,
49-
QEFFAutoModelForSpeechSeq2Seq,
50-
QEFFCommonLoader,
51-
)
52-
from QEfficient.compile.compile_helper import compile
53-
from QEfficient.exporter.export_hf_to_cloud_ai_100 import qualcomm_efficient_converter
54-
from QEfficient.generation.text_generation_inference import cloud_ai_100_exec_kv
55-
from QEfficient.peft import QEffAutoPeftModelForCausalLM
56-
from QEfficient.transformers.transform import transform
57-
58-
# Users can use QEfficient.export for exporting models to ONNX
59-
export = qualcomm_efficient_converter
60-
61-
__all__ = [
62-
"transform",
63-
"export",
64-
"compile",
65-
"cloud_ai_100_exec_kv",
66-
"QEFFAutoModel",
67-
"QEFFAutoModelForCausalLM",
68-
"QEFFAutoModelForCTC",
69-
"QEffAutoPeftModelForCausalLM",
70-
"QEFFAutoModelForImageTextToText",
71-
"QEFFAutoModelForSpeechSeq2Seq",
72-
"QEFFCommonLoader",
73-
]
74-
75-
else:
70+
if not check_qaic_sdk():
7671
logger.warning("QAIC SDK is not installed, eager mode features won't be available!")

QEfficient/generation/cloud_infer.py

Lines changed: 40 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,8 @@
55
#
66
# -----------------------------------------------------------------------------
77

8+
import platform
9+
import sys
810
from pathlib import Path
911
from typing import Dict, List, Optional, Union
1012
from warnings import warn
@@ -13,32 +15,29 @@
1315

1416
try:
1517
import qaicrt
18+
19+
is_qaicrt_imported = True
1620
except ImportError:
17-
import platform
18-
import sys
21+
try:
22+
sys.path.append(f"/opt/qti-aic/dev/lib/{platform.machine()}")
23+
import qaicrt
1924

20-
sys.path.append(f"/opt/qti-aic/dev/lib/{platform.machine()}")
21-
import qaicrt
25+
is_qaicrt_imported = True
26+
except ImportError:
27+
is_qaicrt_imported = False
2228

2329
try:
2430
import QAicApi_pb2 as aicapi
25-
except ImportError:
26-
import sys
2731

28-
sys.path.append("/opt/qti-aic/dev/python")
29-
import QAicApi_pb2 as aicapi
32+
is_aicapi_imported = True
33+
except ImportError:
34+
try:
35+
sys.path.append("/opt/qti-aic/dev/python")
36+
import QAicApi_pb2 as aicapi
3037

31-
aic_to_np_dtype_mapping = {
32-
aicapi.FLOAT_TYPE: np.dtype(np.float32),
33-
aicapi.FLOAT_16_TYPE: np.dtype(np.float16),
34-
aicapi.INT8_Q_TYPE: np.dtype(np.int8),
35-
aicapi.UINT8_Q_TYPE: np.dtype(np.uint8),
36-
aicapi.INT16_Q_TYPE: np.dtype(np.int16),
37-
aicapi.INT32_Q_TYPE: np.dtype(np.int32),
38-
aicapi.INT32_I_TYPE: np.dtype(np.int32),
39-
aicapi.INT64_I_TYPE: np.dtype(np.int64),
40-
aicapi.INT8_TYPE: np.dtype(np.int8),
41-
}
38+
is_aicapi_imported = True
39+
except ImportError:
40+
is_qaicrt_imported = False
4241

4342

4443
class QAICInferenceSession:
@@ -58,6 +57,25 @@ def __init__(
5857
:activate: bool. If false, activation will be disabled. Default=True.
5958
:enable_debug_logs: bool. If True, It will enable debug logs. Default=False.
6059
"""
60+
if not (is_qaicrt_imported and is_aicapi_imported):
61+
raise ImportError(
62+
"Unable to import `qaicrt` and/or `QAicApi_pb2` libraries required for executing QPC files on the CLOUD AI platform.\n"
63+
"Please ensure that the QAIC platform SDK and apps SDK are installed correctly."
64+
)
65+
66+
# Build dtype mapping once (depends on aicapi constants)
67+
self.aic_to_np_dtype_mapping = {
68+
aicapi.FLOAT_TYPE: np.dtype(np.float32),
69+
aicapi.FLOAT_16_TYPE: np.dtype(np.float16),
70+
aicapi.INT8_Q_TYPE: np.dtype(np.int8),
71+
aicapi.UINT8_Q_TYPE: np.dtype(np.uint8),
72+
aicapi.INT16_Q_TYPE: np.dtype(np.int16),
73+
aicapi.INT32_Q_TYPE: np.dtype(np.int32),
74+
aicapi.INT32_I_TYPE: np.dtype(np.int32),
75+
aicapi.INT64_I_TYPE: np.dtype(np.int64),
76+
aicapi.INT8_TYPE: np.dtype(np.int8),
77+
}
78+
6179
# Load QPC
6280
if device_ids is not None:
6381
devices = qaicrt.QIDList(device_ids)
@@ -77,7 +95,7 @@ def __init__(
7795
raise RuntimeError("Failed to getIoDescriptor")
7896
iodesc.ParseFromString(bytes(iodesc_data))
7997
self.allowed_shapes = [
80-
[(aic_to_np_dtype_mapping[x.type].itemsize, list(x.dims)) for x in allowed_shape.shapes]
98+
[(self.aic_to_np_dtype_mapping[x.type].itemsize, list(x.dims)) for x in allowed_shape.shapes]
8199
for allowed_shape in iodesc.allowed_shapes
82100
]
83101
self.bindings = iodesc.selected_set.bindings
@@ -97,7 +115,7 @@ def __init__(
97115
# Create input qbuffers and buf_dims
98116
self.qbuffers = [qaicrt.QBuffer(bytes(binding.size)) for binding in self.bindings]
99117
self.buf_dims = qaicrt.BufferDimensionsVecRef(
100-
[(aic_to_np_dtype_mapping[binding.type].itemsize, list(binding.dims)) for binding in self.bindings]
118+
[(self.aic_to_np_dtype_mapping[binding.type].itemsize, list(binding.dims)) for binding in self.bindings]
101119
)
102120

103121
@property
@@ -205,6 +223,6 @@ def run(self, inputs: Dict[str, np.ndarray]) -> Dict[str, np.ndarray]:
205223
continue
206224
outputs[output_name] = np.frombuffer(
207225
bytes(output_qbuffers[buffer_index]),
208-
aic_to_np_dtype_mapping[self.bindings[buffer_index].type],
226+
self.aic_to_np_dtype_mapping[self.bindings[buffer_index].type],
209227
).reshape(self.buf_dims[buffer_index][1])
210228
return outputs

0 commit comments

Comments
 (0)