@@ -125,6 +125,7 @@ def apply_quantization_config(
125
125
:param run_compressed: Whether the model will be run in compressed mode or
126
126
decompressed fully on load
127
127
"""
128
+ from compressed_tensors .linear .compressed_linear import CompressedLinear
128
129
129
130
config = deepcopy (config )
130
131
if config is None : # see PR #180
@@ -148,7 +149,6 @@ def apply_quantization_config(
148
149
# quant scheme to the matching layers
149
150
matched_targets = match_targets (name , submodule , target_to_scheme )
150
151
scheme = _scheme_from_targets (target_to_scheme , matched_targets , name )
151
-
152
152
# target matched - add layer and scheme to target list
153
153
submodule .quantization_scheme = scheme
154
154
@@ -159,8 +159,6 @@ def apply_quantization_config(
159
159
and isinstance (submodule , torch .nn .Linear )
160
160
and config .format != CompressionFormat .dense .value
161
161
):
162
- from compressed_tensors .linear .compressed_linear import CompressedLinear
163
-
164
162
# TODO: expand to more module types
165
163
compressed_linear = CompressedLinear .from_linear (
166
164
submodule ,
@@ -169,9 +167,6 @@ def apply_quantization_config(
169
167
)
170
168
replace_module (model , name , compressed_linear )
171
169
172
- # target matched - add layer and scheme to target list
173
- submodule .quantization_scheme = scheme
174
-
175
170
# apply current quantization status to each targeted submodule
176
171
apply_quantization_status (submodule , config .quantization_status )
177
172
0 commit comments