use already fused gate_up if fuse_mlp

huggingface · Jan 21, 2025 · 1c8a02c · 1c8a02c
1 parent 0b62ed7
commit 1c8a02c
Showing 1 changed file with 3 additions and 1 deletion.
diff --git a/optimum/neuron/models/phi4/model.py b/optimum/neuron/models/phi4/model.py
@@ -163,7 +163,9 @@ def load_weights(self):
 
             # Note: Automatic MLP padding is safe since zeros are *only* introduced to intermediary state
             if self.neuron_config.fuse_mlp:
-                assert fused_gate_up.shape[0] % self.config.tp_degree == 0, f"mlp weights are not divisible by tp_degree {self.config.tp_degree}"
+                assert (
+                    fused_gate_up.shape[0] % self.config.tp_degree == 0
+                ), f"mlp weights are not divisible by tp_degree {self.config.tp_degree}"
                 new_layer.add_mlp_input(fused_gate_up)
                 if self.neuron_config.mlp_out_weight_transpose:
                     new_layer.add_mlp_output(