QuantizationMetadata class

brian-dellabetta · brian-dellabetta · commit 01af659a8829 · 2025-09-18T16:48:24.000Z
Signed-off-by: Brian Dellabetta &lt;bdellabe@redhat.com&gt;
diff --git a/src/compressed_tensors/quantization/__init__.py b/src/compressed_tensors/quantization/__init__.py
@@ -17,6 +17,6 @@
 
 from .quant_args import *
 from .quant_config import *
-from .quant_names import *
+from .quant_metadata import *
 from .quant_scheme import *
 from .lifecycle import *
diff --git a/src/compressed_tensors/quantization/lifecycle/initialize.py b/src/compressed_tensors/quantization/lifecycle/initialize.py
@@ -20,11 +20,11 @@
 
 import torch
 from compressed_tensors.quantization import (
-    ALL_QPARAM_NAMES,
     FP8_E4M3_DATA,
     ActivationOrdering,
     KVCacheScaleType,
     QuantizationArgs,
+    QuantizationMetadata,
     QuantizationScheme,
     QuantizationStatus,
     QuantizationStrategy,
@@ -76,7 +76,7 @@ def initialize_module_for_quantization(
         # no scheme passed and layer not targeted for quantization - skip
         return
 
-    _clear_all_qparams(module)
+    QuantizationMetadata.clear_all_qparams(module)
 
     if is_attention_module(module):
         # quantized actions based on calltime status
@@ -133,19 +133,6 @@ def is_attention_module(module: Module):
     )
 
 
-def _clear_all_qparams(
-    module: Module,
-):
-    """
-    Clear all previously registered quantization parameters from module
-
-    :param module: module to clear qparams from
-    """
-    for key in ALL_QPARAM_NAMES:
-        if hasattr(module, key):
-            delete_offload_parameter(module, key)
-
-
 def _initialize_scale_zero_point(
     module: Module,
     base_name: str,
diff --git a/src/compressed_tensors/quantization/quant_metadata.py b/src/compressed_tensors/quantization/quant_metadata.py
@@ -0,0 +1,62 @@
+# Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from enum import Enum
+
+from compressed_tensors.utils import delete_offload_parameter
+from torch.nn import Module
+
+
+__all__ = ["QuantizationMetadata", "KVCacheScaleType"]
+
+
+class KVCacheScaleType(Enum):
+    KEY = "k_scale"
+    VALUE = "v_scale"
+
+
+class QuantizationMetadata:
+    """
+    Container class for metadata related to quantization
+    """
+
+    @staticmethod
+    def all_qparam_names():
+        """
+        All quantization parameter names that might be registered
+        onto a module during lifecycle (excluding serialized parameters)
+        """
+        return [KVCacheScaleType.KEY.value, KVCacheScaleType.VALUE.value] + [
+            f"{base_name}_{suffix}"
+            for base_name in ("input", "weight", "output")
+            for suffix in (
+                "global_scale",
+                "scale",
+                "zero_point",
+                "g_idx",
+            )
+        ]
+
+    @classmethod
+    def clear_all_qparams(cls, module: Module):
+        """
+        Remove all parameters related to quantization that might have
+        been registered onto a module previously in lifecycle (excluding
+        serialized parameters)
+
+        :param module: Module to clear
+        """
+        for key in cls.all_qparam_names():
+            if hasattr(module, key):
+                delete_offload_parameter(module, key)
diff --git a/src/compressed_tensors/quantization/quant_names.py b/src/compressed_tensors/quantization/quant_names.py