ALL_QPARAM_NAMES

brian-dellabetta · brian-dellabetta · commit 98a97e573129 · 2025-09-17T20:34:47.000Z
Signed-off-by: Brian Dellabetta &lt;bdellabe@redhat.com&gt;
diff --git a/src/compressed_tensors/quantization/__init__.py b/src/compressed_tensors/quantization/__init__.py
@@ -17,5 +17,6 @@
 
 from .quant_args import *
 from .quant_config import *
+from .quant_names import *
 from .quant_scheme import *
 from .lifecycle import *
diff --git a/src/compressed_tensors/quantization/lifecycle/initialize.py b/src/compressed_tensors/quantization/lifecycle/initialize.py
@@ -16,21 +16,22 @@
 import logging
 import math
 import warnings
-from enum import Enum
 from typing import Optional
 
 import torch
-from compressed_tensors.quantization.lifecycle.forward import (
-    wrap_module_forward_quantized,
-)
-from compressed_tensors.quantization.quant_args import (
+from compressed_tensors.quantization import (
+    ALL_QPARAM_NAMES,
     FP8_E4M3_DATA,
     ActivationOrdering,
+    KVCacheScaleType,
     QuantizationArgs,
+    QuantizationScheme,
+    QuantizationStatus,
     QuantizationStrategy,
 )
-from compressed_tensors.quantization.quant_config import QuantizationStatus
-from compressed_tensors.quantization.quant_scheme import QuantizationScheme
+from compressed_tensors.quantization.lifecycle.forward import (
+    wrap_module_forward_quantized,
+)
 from compressed_tensors.quantization.utils import is_fp4, is_kv_cache_quant_scheme
 from compressed_tensors.utils import (
     delete_offload_parameter,
@@ -44,31 +45,12 @@
 __all__ = [
     "initialize_module_for_quantization",
     "is_attention_module",
-    "KVCacheScaleType",
-    "ALL_QPARAM_KEYS",
 ]
 
 
 _LOGGER = logging.getLogger(__name__)
 
 
-class KVCacheScaleType(Enum):
-    KEY = "k_scale"
-    VALUE = "v_scale"
-
-
-ALL_QPARAM_KEYS = [KVCacheScaleType.KEY.value, KVCacheScaleType.VALUE.value] + [
-    f"{base_name}_{suffix}"
-    for base_name in ("input", "weight", "output")
-    for suffix in (
-        "global_scale",
-        "scale",
-        "zero_point",
-        "g_idx",
-    )
-]
-
-
 def initialize_module_for_quantization(
     module: Module,
     scheme: Optional[QuantizationScheme] = None,
@@ -159,7 +141,7 @@ def _clear_all_qparams(
 
     :param module: module to clear qparams from
     """
-    for key in ALL_QPARAM_KEYS:
+    for key in ALL_QPARAM_NAMES:
         if hasattr(module, key):
             delete_offload_parameter(module, key)
 
diff --git a/src/compressed_tensors/quantization/quant_names.py b/src/compressed_tensors/quantization/quant_names.py
@@ -0,0 +1,48 @@
+# Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from enum import Enum
+
+__all__ = ["ALL_QPARAM_NAMES", "KVCacheScaleType"]
+
+
+class KVCacheScaleType(Enum):
+    KEY = "k_scale"
+    VALUE = "v_scale"
+
+
+ALL_QPARAM_NAMES = (
+    [KVCacheScaleType.KEY.value, KVCacheScaleType.VALUE.value]
+    + [
+        f"{base_name}_{suffix}"
+        for base_name in ("input", "weight", "output")
+        for suffix in (
+            "global_scale",
+            "scale",
+            "zero_point",
+            "g_idx",
+        )
+    ]
+    + [
+        "weight_packed",
+        "weight_global_scale",
+        "weight_shape",
+        "scale_packed",
+        "meta",
+        "shape",
+        "compressed",
+        "bitmask",
+        "row_offsets",
+    ]
+)