[pre-commit.ci] auto fixes from pre-commit.com hooks

pre-commit-ci[bot] · pre-commit-ci[bot] · commit c27f8e4b8c24 · 2025-07-30T07:47:21.000Z
for more information, see https://pre-commit.ci
diff --git a/auto_round/autoround.py b/auto_round/autoround.py
@@ -2279,11 +2279,11 @@ def get_act_max_hook(module, input, output):
     #         # Apply the configuration to the corresponding layer in the model
     #         for key in keys:
     #             setattr(m, key, low_config[key])
-    
+
     #     return layer_config
 
     @torch.inference_mode()
-    def check_needs_auto_gguf_mix_mse(self, block, formats, input_ids, input_others, outputs, device,cache_device):
+    def check_needs_auto_gguf_mix_mse(self, block, formats, input_ids, input_others, outputs, device, cache_device):
         ## TODO Q4_K_M does not support iters==0
         ## TODO for moe model, expert use default bits
         mse_reduction = "mean"
@@ -2293,35 +2293,35 @@ def check_needs_auto_gguf_mix_mse(self, block, formats, input_ids, input_others,
 
         target_gguf_format = None
         for format in formats:
-            if format.startswith("gguf") and 'm' in format:
+            if format.startswith("gguf") and "m" in format:
                 target_gguf_format = format
         if target_gguf_format is None:
             return
 
         ## simple verification, if the layer_config has any mixed-bits setting, we don't apply auto mix precision
-        bits = []  
-        count=0
+        bits = []
+        count = 0
         quant_bits = {}
-        for n, m in block.named_modules():#[4 4 6 4 4 6 8]
+        for n, m in block.named_modules():  # [4 4 6 4 4 6 8]
             if hasattr(m, "bits"):
                 bits.append(m.bits)
-                quant_bits[m.bits]=0
+                quant_bits[m.bits] = 0
         ori_bit = min(bits)
         for b in bits:
             if b != ori_bit:
-                quant_bits[b]+=1
-        bits = set(bits) #{4,6}
+                quant_bits[b] += 1
+        bits = set(bits)  # {4,6}
         if len(bits) <= 1:
             return
         del quant_bits[min(bits)]
-        
+
         layer_names = []
- 
+
         for n, m in block.named_modules():
             if check_to_quantized(m):
                 layer_names.append(n)
-                count+=1
-        
+                count += 1
+
         if count > 10:
             logger.info("不进行选择")
             return
@@ -2334,17 +2334,17 @@ def check_needs_auto_gguf_mix_mse(self, block, formats, input_ids, input_others,
         # current_output = to_device(current_output, device)
         current_input_ids = [input_ids[i] for i in whole_indices]
         default_config = GGUF_CONFIG[target_gguf_format]
-        split_list = re.split(':|_',target_gguf_format)
+        split_list = re.split(":|_", target_gguf_format)
         mix_configs = {}
-        
-        for k,_ in quant_bits.items():
+
+        for k, _ in quant_bits.items():
             mix_configs[k] = GGUF_CONFIG[f"gguf:q{k}_{split_list[2]}"]
-        
+
         d_format = [f"gguf:q{min(bits)}_{split_list[2]}"]
         low_config = GGUF_CONFIG[f"gguf:q{min(bits)}_{split_list[2]}"]
 
         default_layer_config = low_config
-        
+
         # for k in self.layer_config.keys():
         #     s = re.split('\.',k)
         #     if len(s) <2:
@@ -2355,73 +2355,135 @@ def check_needs_auto_gguf_mix_mse(self, block, formats, input_ids, input_others,
 
         if len(bits) == 2:
             logger.info("量化单bit")
-            self.choose_one_bit(block,mix_configs,quant_bits,default_config,default_layer_config,layer_names,current_input_ids,input_others,current_output,mse_loss,device,cache_device)
+            self.choose_one_bit(
+                block,
+                mix_configs,
+                quant_bits,
+                default_config,
+                default_layer_config,
+                layer_names,
+                current_input_ids,
+                input_others,
+                current_output,
+                mse_loss,
+                device,
+                cache_device,
+            )
         else:
             logger.info("量化多bit")
-            self.choose_various_bit(block,mix_configs,quant_bits,default_config,default_layer_config,layer_names,current_input_ids,input_others,current_output,mse_loss,device,cache_device)
-
+            self.choose_various_bit(
+                block,
+                mix_configs,
+                quant_bits,
+                default_config,
+                default_layer_config,
+                layer_names,
+                current_input_ids,
+                input_others,
+                current_output,
+                mse_loss,
+                device,
+                cache_device,
+            )
 
-    def choose_one_bit(self,block,mix_configs,quant_bits,default_config,default_layer_config,layer_names,current_input_ids,input_others,current_output,mse_loss,device,cache_device):
+    def choose_one_bit(
+        self,
+        block,
+        mix_configs,
+        quant_bits,
+        default_config,
+        default_layer_config,
+        layer_names,
+        current_input_ids,
+        input_others,
+        current_output,
+        mse_loss,
+        device,
+        cache_device,
+    ):
         each_loss = {}
         # bit = mix_configs.keys()[0]
-        [(_,cur_config)] = mix_configs.items()
-        [(_,num_bit)] = quant_bits.items()
+        [(_, cur_config)] = mix_configs.items()
+        [(_, num_bit)] = quant_bits.items()
         for layer_name in layer_names:
             module = get_module(block, layer_name)
             self.layer_config[module.tmp_name] = default_config
-            for key in cur_config:  
-                setattr(module,key,cur_config[key])
-            
-            wrapper_layer = WrapperLinear(module,enable_minmax_tuning=False,enable_round_tuning=False,enable_norm_bias_tuning=False,device=device)
+            for key in cur_config:
+                setattr(module, key, cur_config[key])
+
+            wrapper_layer = WrapperLinear(
+                module,
+                enable_minmax_tuning=False,
+                enable_round_tuning=False,
+                enable_norm_bias_tuning=False,
+                device=device,
+            )
             set_module(block, layer_name, wrapper_layer)
-            q_output = self.get_block_outputs(block, current_input_ids, input_others, self.batch_size * self.infer_bs_coeff,
-                                    device,
-                                    cache_device)
-            
-            set_module(block,layer_name,wrapper_layer.orig_layer)
+            q_output = self.get_block_outputs(
+                block, current_input_ids, input_others, self.batch_size * self.infer_bs_coeff, device, cache_device
+            )
+
+            set_module(block, layer_name, wrapper_layer.orig_layer)
             module = get_module(block, layer_name)
-            for key in default_config:  
-                setattr(module,key,default_config[key])
-            cur_loss=mse_loss(torch.stack(q_output).squeeze(1),current_output)
-            each_loss[layer_name] = cur_loss #把每一层的loss记录下来
-        
+            for key in default_config:
+                setattr(module, key, default_config[key])
+            cur_loss = mse_loss(torch.stack(q_output).squeeze(1), current_output)
+            each_loss[layer_name] = cur_loss  # 把每一层的loss记录下来
+
         top_n_loss = sorted(each_loss.items(), key=lambda x: x[1], reverse=False)[:num_bit]
         # breakpoint()
         # tmp_list.append(max_loss[1])
         flag = {}
-        for layer_name,_ in top_n_loss:
+        for layer_name, _ in top_n_loss:
             module = get_module(block, layer_name)
-            for key in cur_config:  
-                setattr(module,key,cur_config[key])
-            
+            for key in cur_config:
+                setattr(module, key, cur_config[key])
+
             self.layer_config[module.tmp_name] = cur_config
-                    # continue
+            # continue
 
-        
-    
-    def choose_various_bit(self,block,mix_configs,quant_bits,cur_config,default_config,default_layer_config,layer_names,current_input_ids,input_others,current_output,mse_loss,device,cache_device):
+    def choose_various_bit(
+        self,
+        block,
+        mix_configs,
+        quant_bits,
+        cur_config,
+        default_config,
+        default_layer_config,
+        layer_names,
+        current_input_ids,
+        input_others,
+        current_output,
+        mse_loss,
+        device,
+        cache_device,
+    ):
         each_loss = {}
         for layer_name in layer_names:
             module = get_module(block, layer_name)
-            for key in default_config:  
-                setattr(module,key,cur_config[key])
-            
-            wrapper_layer = WrapperLinear(module,enable_minmax_tuning=False,enable_round_tuning=False,enable_norm_bias_tuning=False,device=device)
+            for key in default_config:
+                setattr(module, key, cur_config[key])
+
+            wrapper_layer = WrapperLinear(
+                module,
+                enable_minmax_tuning=False,
+                enable_round_tuning=False,
+                enable_norm_bias_tuning=False,
+                device=device,
+            )
             set_module(block, layer_name, wrapper_layer)
-            q_output = self.get_block_outputs(block, current_input_ids, input_others, self.batch_size * self.infer_bs_coeff,
-                                    device,
-                                    cache_device)
-            set_module(block,layer_name,wrapper_layer.orig_layer)
-            
-            cur_loss=mse_loss(torch.stack(q_output).squeeze(1),current_output)
-            each_loss[layer_name] = cur_loss #把每一层的loss记录下来
-        
-        top_n_loss = sorted(each_loss.items(), key=lambda x: x[1], reverse=True)[:sum(quant_bits.values())]
+            q_output = self.get_block_outputs(
+                block, current_input_ids, input_others, self.batch_size * self.infer_bs_coeff, device, cache_device
+            )
+            set_module(block, layer_name, wrapper_layer.orig_layer)
+
+            cur_loss = mse_loss(torch.stack(q_output).squeeze(1), current_output)
+            each_loss[layer_name] = cur_loss  # 把每一层的loss记录下来
+
+        top_n_loss = sorted(each_loss.items(), key=lambda x: x[1], reverse=True)[: sum(quant_bits.values())]
         shift = 0
-        for k,_ in top_n_loss.items():
+        for k, _ in top_n_loss.items():
             self.layer_config[module.tmp_name] = cur_config
-                
-
 
     def quant_block(self, block, input_ids, input_others, q_input=None, device=torch.device("cpu")):
         """Quantize the weights of a given block of the model.
@@ -2476,7 +2538,8 @@ def quant_block(self, block, input_ids, input_others, q_input=None, device=torch
                 handle.remove()
 
         self.check_needs_auto_gguf_mix_mse(
-            block, self.formats, input_ids, input_others, output, device, self.cache_device)
+            block, self.formats, input_ids, input_others, output, device, self.cache_device
+        )
 
         if q_input is not None:
             if input_ids is not q_input:
diff --git a/auto_round/export/export_to_gguf/config.py b/auto_round/export/export_to_gguf/config.py
@@ -158,31 +158,31 @@ class ModelType(IntEnum):
 GGUF_CONFIG["gguf:q4_0"] = GGUF_INNER_CONFIG["gguf:q4_0"]
 GGUF_CONFIG["gguf:q4_0"]["mostly"] = "gguf:q4_0"
 GGUF_CONFIG["gguf:q4_1"] = GGUF_INNER_CONFIG["gguf:q4_1"]
-GGUF_CONFIG["gguf:q4_1"]["mostly"]= "gguf:q4_1"
+GGUF_CONFIG["gguf:q4_1"]["mostly"] = "gguf:q4_1"
 GGUF_CONFIG["gguf:q4_k"] = GGUF_INNER_CONFIG["gguf:q4_k"]
 GGUF_CONFIG["gguf:q5_0"] = GGUF_INNER_CONFIG["gguf:q5_0"]
 GGUF_CONFIG["gguf:q5_0"]["mostly"] = "gguf:q5_0"
 GGUF_CONFIG["gguf:q5_1"] = GGUF_INNER_CONFIG["gguf:q5_1"]
 GGUF_CONFIG["gguf:q5_1"]["mostly"] = "gguf:q5_1"
 GGUF_CONFIG["gguf:q5_k"] = GGUF_INNER_CONFIG["gguf:q5_k"]
 GGUF_CONFIG["gguf:q2_k_s"] = GGUF_INNER_CONFIG["gguf:q2_k"]
-GGUF_CONFIG["gguf:q2_k_s"]["mostly"]= "gguf:q2_k"
+GGUF_CONFIG["gguf:q2_k_s"]["mostly"] = "gguf:q2_k"
 GGUF_CONFIG["gguf:q3_k"] = GGUF_INNER_CONFIG["gguf:q3_k"]
 GGUF_CONFIG["gguf:q3_k"]["mostly"] = "gguf:q3_k"
 GGUF_CONFIG["gguf:q3_k_s"] = GGUF_INNER_CONFIG["gguf:q3_k"]
 GGUF_CONFIG["gguf:q3_k_s"]["mostly"] = "gguf:q3_k"
 GGUF_CONFIG["gguf:q3_k_m"] = GGUF_INNER_CONFIG["gguf:q3_k"]
 GGUF_CONFIG["gguf:q3_k_m"]["mostly"] = "gguf:q3_k"
 GGUF_CONFIG["gguf:q3_k_l"] = GGUF_INNER_CONFIG["gguf:q3_k"]
-GGUF_CONFIG["gguf:q3_k_l"]["mostly"]= "gguf:q3_k"
+GGUF_CONFIG["gguf:q3_k_l"]["mostly"] = "gguf:q3_k"
 GGUF_CONFIG["gguf:q4_k"] = GGUF_INNER_CONFIG["gguf:q4_k"]
-GGUF_CONFIG["gguf:q4_k"]["mostly"]= "gguf:q4_k"
+GGUF_CONFIG["gguf:q4_k"]["mostly"] = "gguf:q4_k"
 GGUF_CONFIG["gguf:q4_k_s"] = GGUF_INNER_CONFIG["gguf:q4_k"]
 GGUF_CONFIG["gguf:q4_k_s"]["mostly"] = "gguf:q4_k"
 GGUF_CONFIG["gguf:q4_k_m"] = GGUF_INNER_CONFIG["gguf:q4_k"]
 GGUF_CONFIG["gguf:q4_k_m"]["mostly"] = "gguf:q4_k"
 GGUF_CONFIG["gguf:q5_k"] = GGUF_INNER_CONFIG["gguf:q5_k"]
-GGUF_CONFIG["gguf:q5_k"]["mostly"]= "gguf:q5_k"
+GGUF_CONFIG["gguf:q5_k"]["mostly"] = "gguf:q5_k"
 GGUF_CONFIG["gguf:q5_k_s"] = GGUF_INNER_CONFIG["gguf:q5_k"]
 GGUF_CONFIG["gguf:q5_k_s"]["mostly"] = "gguf:q5_k"
 GGUF_CONFIG["gguf:q5_k_m"] = GGUF_INNER_CONFIG["gguf:q5_k"]
diff --git a/auto_round/utils.py b/auto_round/utils.py