suit transformers>=4.51

xin3he · xin3he · commit cd5836071678 · 2025-04-14T11:09:11.000+08:00
Signed-off-by: xin3he &lt;xin3.he@intel.com&gt;
diff --git a/neural_compressor/torch/algorithms/weight_only/save_load.py b/neural_compressor/torch/algorithms/weight_only/save_load.py
@@ -21,6 +21,7 @@
 import tempfile
 
 import torch
+import transformers
 
 from neural_compressor.common.utils import AWQ, TEQ, save_config_mapping
 from neural_compressor.torch.utils import (
@@ -846,7 +847,8 @@ def _init_hf_model(self, model_class, config):
 
             dtype_orig = model_class._set_default_torch_dtype(torch_dtype)
 
-        init_contexts = [no_init_weights(_enable=_fast_init)]
+        init_contexts = [no_init_weights(_enable=_fast_init)]  if transformers.__version__ < "4.51" else\
+                        [no_init_weights()]
         init_contexts.append(init_empty_weights())
 
         with ContextManagers(init_contexts):
diff --git a/neural_compressor/transformers/models/modeling_auto.py b/neural_compressor/transformers/models/modeling_auto.py
@@ -678,7 +678,8 @@ def load_low_bit(cls, pretrained_model_name_or_path, *model_args, **kwargs):
             quantization_config.weight_dtype = "int4"
             logger.warning("int4 weight_dtype is used, please change the config.json if you don't want to use it.")
 
-        init_contexts = [no_init_weights(_enable=_fast_init)]
+        init_contexts = [no_init_weights(_enable=_fast_init)]  if transformers.__version__ < "4.51" else\
+                        [no_init_weights()]
         init_contexts.append(init_empty_weights())
 
         with ContextManagers(init_contexts):