squashed/rebased

brian-dellabetta · brian-dellabetta · commit 03fb664daf41 · 2025-08-21T19:14:55.000Z
Signed-off-by: Brian Dellabetta &lt;bdellabe@redhat.com&gt;
diff --git a/src/compressed_tensors/quantization/lifecycle/apply.py b/src/compressed_tensors/quantization/lifecycle/apply.py
@@ -13,12 +13,11 @@
 # limitations under the License.
 
 import logging
-import re
-from collections import OrderedDict, defaultdict
+from collections import OrderedDict
 from copy import deepcopy
 from typing import Dict, Iterable, List, Optional
 from typing import OrderedDict as OrderedDictType
-from typing import Set, Union
+from typing import Union
 
 import torch
 from compressed_tensors.config import CompressionFormat
@@ -39,7 +38,12 @@
     infer_quantization_status,
     is_kv_cache_quant_scheme,
 )
-from compressed_tensors.utils.helpers import fix_fsdp_module_name, replace_module
+from compressed_tensors.utils.helpers import (
+    fix_fsdp_module_name,
+    deprecated,
+    replace_module,
+)
+from compressed_tensors.utils.match import match_named_modules, match_targets
 from compressed_tensors.utils.offload import update_parameter_data
 from compressed_tensors.utils.safetensors_load import get_safetensors_folder
 from safetensors import safe_open
@@ -51,8 +55,6 @@
     "apply_quantization_config",
     "apply_quantization_status",
     "find_name_or_class_matches",
-    "expand_target_names",
-    "is_target",
 ]
 
 from compressed_tensors.quantization.utils.helpers import is_module_quantized
@@ -144,31 +146,24 @@ def apply_quantization_config(
         for target in scheme.targets:
             target_to_scheme[target] = scheme
 
-    if run_compressed:
-        from compressed_tensors.linear.compressed_linear import CompressedLinear
-
-    # list of submodules to ignore
-    ignored_submodules = defaultdict(list)
-    # mark appropriate layers for quantization by setting their quantization schemes
-    for name, submodule in model.named_modules():
-        # potentially fix module name to remove FSDP wrapper prefix
-        name = fix_fsdp_module_name(name)
-        if matches := find_name_or_class_matches(name, submodule, config.ignore):
-            for match in matches:
-                ignored_submodules[match].append(name)
-            continue  # layer matches ignore list, continue
-
-        targets = find_name_or_class_matches(name, submodule, target_to_scheme)
+        # mark appropriate layers for quantization by setting their quantization schemes
+        for name, submodule in match_named_modules(
+            model, scheme.targets, config.ignore, warn_on_fail=True
+        ):
+            # potentially fix module name to remove FSDP wrapper prefix
+            name = fix_fsdp_module_name(name)
 
-        if targets:
             # mark modules to be quantized by adding
             # quant scheme to the matching layers
-            scheme = _scheme_from_targets(target_to_scheme, targets, name)
+            scheme = _scheme_from_targets(target_to_scheme, scheme.targets, name)
             if run_compressed:
                 format = config.format
                 if format != CompressionFormat.dense.value:
                     if isinstance(submodule, torch.nn.Linear):
-                        # TODO: expand to more module types
+                        from compressed_tensors.linear.compressed_linear import (
+                            CompressedLinear,
+                        )
+
                         compressed_linear = CompressedLinear.from_linear(
                             submodule,
                             quantization_scheme=scheme,
@@ -181,16 +176,18 @@ def apply_quantization_config(
 
             names_to_scheme[name] = submodule.quantization_scheme
 
-    if config.ignore is not None and ignored_submodules is not None:
-        if set(config.ignore) - set(ignored_submodules):
-            _LOGGER.warning(
-                "Some layers that were to be ignored were "
-                "not found in the model: "
-                f"{set(config.ignore) - set(ignored_submodules)}"
-            )
+            # apply current quantization status to each targeted submodule
+            apply_quantization_status(submodule, config.quantization_status)
+
+    # TODO warn on ignore not being found, this is useful in debugging
+    # if config.ignore is not None and ignored_submodules is not None:
+    #     if set(config.ignore) - set(ignored_submodules):
+    #         _LOGGER.warning(
+    #             "Some layers that were to be ignored were "
+    #             "not found in the model: "
+    #             f"{set(config.ignore) - set(ignored_submodules)}"
+    #         )
 
-    # apply current quantization status across all targeted layers
-    apply_quantization_status(model, config.quantization_status)
     return names_to_scheme