Skip to content

Commit 98a97e5

Browse files
ALL_QPARAM_NAMES
Signed-off-by: Brian Dellabetta <[email protected]>
1 parent 5e5ffb5 commit 98a97e5

File tree

3 files changed

+58
-27
lines changed

3 files changed

+58
-27
lines changed

src/compressed_tensors/quantization/__init__.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,5 +17,6 @@
1717

1818
from .quant_args import *
1919
from .quant_config import *
20+
from .quant_names import *
2021
from .quant_scheme import *
2122
from .lifecycle import *

src/compressed_tensors/quantization/lifecycle/initialize.py

Lines changed: 9 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -16,21 +16,22 @@
1616
import logging
1717
import math
1818
import warnings
19-
from enum import Enum
2019
from typing import Optional
2120

2221
import torch
23-
from compressed_tensors.quantization.lifecycle.forward import (
24-
wrap_module_forward_quantized,
25-
)
26-
from compressed_tensors.quantization.quant_args import (
22+
from compressed_tensors.quantization import (
23+
ALL_QPARAM_NAMES,
2724
FP8_E4M3_DATA,
2825
ActivationOrdering,
26+
KVCacheScaleType,
2927
QuantizationArgs,
28+
QuantizationScheme,
29+
QuantizationStatus,
3030
QuantizationStrategy,
3131
)
32-
from compressed_tensors.quantization.quant_config import QuantizationStatus
33-
from compressed_tensors.quantization.quant_scheme import QuantizationScheme
32+
from compressed_tensors.quantization.lifecycle.forward import (
33+
wrap_module_forward_quantized,
34+
)
3435
from compressed_tensors.quantization.utils import is_fp4, is_kv_cache_quant_scheme
3536
from compressed_tensors.utils import (
3637
delete_offload_parameter,
@@ -44,31 +45,12 @@
4445
__all__ = [
4546
"initialize_module_for_quantization",
4647
"is_attention_module",
47-
"KVCacheScaleType",
48-
"ALL_QPARAM_KEYS",
4948
]
5049

5150

5251
_LOGGER = logging.getLogger(__name__)
5352

5453

55-
class KVCacheScaleType(Enum):
56-
KEY = "k_scale"
57-
VALUE = "v_scale"
58-
59-
60-
ALL_QPARAM_KEYS = [KVCacheScaleType.KEY.value, KVCacheScaleType.VALUE.value] + [
61-
f"{base_name}_{suffix}"
62-
for base_name in ("input", "weight", "output")
63-
for suffix in (
64-
"global_scale",
65-
"scale",
66-
"zero_point",
67-
"g_idx",
68-
)
69-
]
70-
71-
7254
def initialize_module_for_quantization(
7355
module: Module,
7456
scheme: Optional[QuantizationScheme] = None,
@@ -159,7 +141,7 @@ def _clear_all_qparams(
159141
160142
:param module: module to clear qparams from
161143
"""
162-
for key in ALL_QPARAM_KEYS:
144+
for key in ALL_QPARAM_NAMES:
163145
if hasattr(module, key):
164146
delete_offload_parameter(module, key)
165147

Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,48 @@
1+
# Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved.
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing,
10+
# software distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
from enum import Enum
16+
17+
__all__ = ["ALL_QPARAM_NAMES", "KVCacheScaleType"]
18+
19+
20+
class KVCacheScaleType(Enum):
21+
KEY = "k_scale"
22+
VALUE = "v_scale"
23+
24+
25+
ALL_QPARAM_NAMES = (
26+
[KVCacheScaleType.KEY.value, KVCacheScaleType.VALUE.value]
27+
+ [
28+
f"{base_name}_{suffix}"
29+
for base_name in ("input", "weight", "output")
30+
for suffix in (
31+
"global_scale",
32+
"scale",
33+
"zero_point",
34+
"g_idx",
35+
)
36+
]
37+
+ [
38+
"weight_packed",
39+
"weight_global_scale",
40+
"weight_shape",
41+
"scale_packed",
42+
"meta",
43+
"shape",
44+
"compressed",
45+
"bitmask",
46+
"row_offsets",
47+
]
48+
)

0 commit comments

Comments
 (0)